ezc/src/patternparser.cpp

1296 lines
22 KiB
C++

/*
* This file is a part of EZC -- Easy templating in C++ library
* and is distributed under the BSD 3-Clause licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2007-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "patternparser.h"
#include "convert/convert.h"
namespace Ezc
{
PatternParser::PatternParser()
{
allow_include = true;
pblocks = 0;
include_level_max = 100;
delete_white_text_items = false;
program_mode = false;
log = nullptr;
}
void PatternParser::Directory(const char * dir, const char * dir2)
{
directory.clear();
directory2.clear();
if( dir )
pt::utf8_to_wide(dir, directory);
if( dir2 )
pt::utf8_to_wide(dir2, directory2);
}
void PatternParser::Directory(const std::string & dir)
{
pt::utf8_to_wide(dir, directory);
directory2.clear();
}
void PatternParser::Directory(const std::string & dir, const std::string & dir2)
{
pt::utf8_to_wide(dir, directory);
pt::utf8_to_wide(dir2, directory2);
}
void PatternParser::Directory(const wchar_t * dir, const wchar_t * dir2)
{
directory.clear();
directory2.clear();
if( dir )
directory = dir;
if( dir2 )
directory2 = dir2;
}
void PatternParser::Directory(const std::wstring & dir)
{
directory = dir;
directory2.clear();
}
void PatternParser::Directory(const std::wstring & dir, const std::wstring & dir2)
{
directory = dir;
directory2 = dir2;
}
void PatternParser::SetBlocks(Blocks & blocks)
{
pblocks = &blocks;
}
void PatternParser::SetCommentary(const char * com_start, const char * com_stop)
{
pt::utf8_to_wide(com_start, commentary_start);
pt::utf8_to_wide(com_stop, commentary_stop);
}
void PatternParser::SetCommentary(const std::string & com_start, const std::string & com_stop)
{
pt::utf8_to_wide(com_start, commentary_start);
pt::utf8_to_wide(com_stop, commentary_stop);
}
void PatternParser::SetCommentary(const wchar_t * com_start, const wchar_t * com_stop)
{
commentary_start = com_start;
commentary_stop = com_stop;
}
void PatternParser::SetCommentary(const std::wstring & com_start, const std::wstring & com_stop)
{
commentary_start = com_start;
commentary_stop = com_stop;
}
void PatternParser::SetProgramMode(bool program_mode)
{
this->program_mode = program_mode;
}
void PatternParser::SetLogger(pt::Log * log)
{
this->log = log;
}
void PatternParser::CreateMsg(std::wstring & out, const wchar_t * type, const wchar_t * arg)
{
out = commentary_start;
out += L"Ezc: ";
out += type;
if( arg )
{
out += ' ';
out += arg;
}
out += commentary_stop;
}
void PatternParser::ParseFile(const std::string & file_name, Pattern & pattern)
{
ParseFile(file_name.c_str(), pattern);
}
void PatternParser::ParseFile(const char * file_name, Pattern & pattern)
{
pat = &pattern;
pt::utf8_to_wide(file_name, pat->item_root.file_name);
include_level = 0;
CreateTreeReadIncludeSkipAllowFlag(pat->item_root);
}
void PatternParser::ParseFile(const std::wstring & file_name, Pattern & pattern)
{
ParseFile(file_name.c_str(), pattern);
}
void PatternParser::ParseFile(const wchar_t * file_name, Pattern & pattern)
{
pat = &pattern;
pat->item_root.file_name = file_name;
include_level = 0;
CreateTreeReadIncludeSkipAllowFlag(pat->item_root);
}
void PatternParser::ParseString(const char * str, Pattern & pattern)
{
pt::utf8_to_wide(str, string_content);
ParseString(string_content.c_str(), pattern);
string_content.clear();
}
void PatternParser::ParseString(const std::string & str, Pattern & pattern)
{
ParseString(str.c_str(), pattern);
}
void PatternParser::ParseString(const wchar_t * str, Pattern & pattern)
{
pat = &pattern;
itext = str;
include_level = 0;
pat->item_root.Clear();
CreateTreeContainer(pat->item_root);
}
void PatternParser::ParseString(const std::wstring & str, Pattern & pattern)
{
ParseString(str.c_str(), pattern);
}
void PatternParser::AllowInclude(bool allow)
{
allow_include = allow;
}
void PatternParser::DeleteWhiteTextItems(bool del)
{
delete_white_text_items = del;
}
void PatternParser::SetIncludeMax(int include_max)
{
include_level_max = include_max;
}
bool PatternParser::HasFileAtBeginning(const wchar_t * path, const wchar_t * file)
{
for(; *path && *file; ++path, ++file)
{
if( *path != *file )
return false;
}
if( *file != 0 )
return false;
// "\" is from a dos path syntax
if( *path==0 || *path=='\\' || *path=='/' )
return true;
return false;
}
/*
".." is not allowed in the file path
you cannot go up from your template directory
*/
bool PatternParser::IsFileCorrect(const wchar_t * name)
{
while( *name )
{
if( HasFileAtBeginning(name, L"..") )
return false;
// looking for the next slash of backslash
while( *name && *name!='\\' && *name!='/' )
name += 1;
// skipping the slash (or backslash)
if( *name )
name += 1;
}
return true;
}
/*
'name' must be a relative path - without a slash or backslash
*/
void PatternParser::ReadFile(const std::wstring & name, std::wstring & result)
{
ReadFile(name.c_str(), result);
}
/*
'name' must be a relative path - without a slash or backslash
*/
void PatternParser::ReadFile(const wchar_t * name, std::wstring & result)
{
if( !IsFileCorrect(name) )
{
CreateMsg(result, L"incorrect file name: ", name);
}
else
{
result.clear();
if( !ReadFileFromDir(directory, name, result) )
if( !ReadFileFromDir(directory2, name, result) )
CreateMsg(result, L"can't open: ", name);
}
}
bool PatternParser::ReadFileFromDir(const std::wstring & dir, const wchar_t * name, std::wstring & result)
{
if( dir.empty() )
return false;
file_name = dir;
file_name += '/';
file_name += name;
pt::wide_to_utf8(file_name, afile_name);
std::ifstream file(afile_name.c_str());
if( !file )
{
file_name.clear();
afile_name.clear();
return false;
}
if( log )
{
if( include_level <= 1 )
(*log) << pt::Log::log4 << "Ezc: reading pattern: " << afile_name << pt::Log::logend;
else
(*log) << pt::Log::log4 << " including pattern: " << afile_name << pt::Log::logend;
}
ReadFile(file, result);
file_name.clear();
afile_name.clear();
return true;
}
void PatternParser::ReadFile(std::ifstream & file, std::wstring & result)
{
pt::utf8_to_wide(file, result);
}
int PatternParser::ReadCharInText()
{
if( *itext==0 || *itext=='[' )
return -1;
if( *itext == '\\' )
{
if( *(itext+1)=='\\' || *(itext+1)=='[' || *(itext+1)==']' )
++itext;
}
return *(itext++);
}
bool PatternParser::IsWhite(wchar_t c)
{
// 13 (\r) is from a dos file at the end of a line (\r\n)
// 160 is a non-breaking space
if( c==' ' || c=='\t' || c==13 || c==160 || c==10 )
return true;
return false;
}
void PatternParser::SkipWhite()
{
while( IsWhite(*itext) )
++itext;
}
void PatternParser::SkipOneStatement()
{
size_t count = 1;
while( *itext != 0 && count > 0 )
{
if( *itext == '[' )
count += 1;
else
if( *itext == ']' )
count -= 1;
itext += 1;
}
}
void PatternParser::CheckWhiteAndDelete(std::wstring & s)
{
size_t i;
if( s.empty() )
return;
for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);
if( i == s.size() )
s.clear(); // the whole string consists of white characters
}
void PatternParser::CheckFunctionIsNumber(Item::Function & function)
{
if( IsPositiveNumber(function.name) )
function.arg = wcstol(function.name.c_str(), 0, 10);
}
bool PatternParser::IsNameChar(wchar_t c)
{
return ((c>='a' && c<='z') ||
(c>='A' && c<='Z') ||
(c>='0' && c<='9') ||
c=='_' || c=='-' || c=='.' || c=='#' || c=='?');
}
bool PatternParser::IsDigit(wchar_t c)
{
return (c>='0' && c<='9');
}
bool PatternParser::IsPositiveNumber(const std::wstring & str)
{
size_t i;
for(i=0 ; i<str.size() ; ++i)
if( !IsDigit(str[i]) )
return false;
return true;
}
// reading an expression name or a function name
bool PatternParser::ReadName(std::wstring & name)
{
name.clear();
while( IsNameChar(*itext) )
{
name += *itext;
++itext;
}
return !name.empty();
}
// string can have a quote character (escaped with a backslash) e.g. "sample text \"with quotes\""
// use \\ to insert one backslash
bool PatternParser::ReadString(std::wstring & str)
{
str.clear();
SkipWhite();
// string is signed by its first quote character (")
if( *itext != '\"' )
return false;
++itext;
while( *itext && *itext!='\"' )
{
if( itext[0]=='\\' && itext[1]=='\"' )
{
str += '\"';
itext += 2;
}
else
if( itext[0]=='\\' && itext[1]=='\\' )
{
str += '\\';
itext += 2;
}
else
{
str += *itext;
itext += 1;
}
}
if( *itext == '\"' )
{
++itext;
return true;
}
else
{
return false;
}
}
bool PatternParser::ReadParamString(Item::Function & function)
{
Item::Function & fun = function.AddNewParam();
fun.is_function = false;
return ReadString(fun.name);
}
/*
returns true if it correctly reads all parameters
*/
bool PatternParser::ReadParams(Item::Function & function)
{
bool res = true;
bool check_next_param = true;
while( check_next_param && res )
{
SkipWhite();
if( *itext == '[' )
{
res = ReadFunction(function.AddNewParam(), true);
}
else
if( *itext == '\"' )
{
res = ReadParamString(function);
}
else
if( *itext != ']' && *itext != 0 )
{
res = ReadFunction(function.AddNewParam(), false);
}
else
{
// *itext is equal to ']'
check_next_param = false;
res = true;
}
}
return res;
}
bool PatternParser::ReadFunction(Item::Function & function, bool with_params)
{
bool res = false;
SkipWhite();
function.Clear();
if( *itext == '[' )
{
itext += 1;
res = ReadFunction(function, true);
SkipWhite();
if( *itext == ']' )
{
itext += 1;
}
else
{
SkipOneStatement();
res = false;
}
}
else
{
res = ReadName(function.name);
if( res )
{
if( *itext == ':' )
{
itext += 1;
ReadName(function.postfix); // we allow the postfix to be empty
}
if( with_params )
res = ReadParams(function);
}
}
if( res )
{
// IMPROVE ME
// this will be called more than once for nested functions
CheckFunctionIsNumber(function);
}
function.is_function = res;
return res;
}
/*
returns true if a function has been correctly read
*/
bool PatternParser::ReadFunction(Item & item)
{
bool function_read_correctly = ReadFunction(item.function, true);
if( !function_read_correctly )
{
item.function.Clear();
item.type = Item::item_err;
}
return function_read_correctly;
}
void PatternParser::CreateTreeReadItemDirectiveCheckEnding(Item & item)
{
SkipWhite();
if( *itext == ']' )
{
itext += 1;
}
else
{
item.type = Item::item_err;
item.function.Clear();
SkipOneStatement();
}
}
void PatternParser::ReadNormalStatement(Item & item)
{
item.type = Item::item_function;
item.has_function = true;
if( !ReadFunction(item.function, true) )
{
item.type = Item::item_err;
item.function.Clear();
}
}
void PatternParser::ReadDirectiveIf(Item & item)
{
item.type = Item::item_if;
ReadFunction(item);
}
void PatternParser::ReadDirectiveEnd(Item & item)
{
item.type = Item::item_end;
}
void PatternParser::ReadDirectiveElse(Item & item)
{
item.type = Item::item_else;
}
void PatternParser::ReadDirectiveFor(Item & item)
{
item.type = Item::item_for;
ReadFunction(item);
}
void PatternParser::ReadDirectiveComment(Item & item)
{
item.type = Item::item_comment;
// skipping the comment
while( *itext && *itext!=']' )
++itext;
}
void PatternParser::ReadDirectiveInclude(Item & item)
{
if( ReadString(item.file_name) )
item.type = Item::item_include;
else
item.type = Item::item_err;
}
void PatternParser::ReadDirectiveDef(Item & item)
{
item.type = Item::item_def;
if( ReadFunction(item) )
{
if( item.function.parameters.size() > 1 )
item.type = Item::item_err;
}
}
void PatternParser::ReadDirectiveDefIfNotSet(Item & item)
{
item.type = Item::item_def_if_not_set;
if( ReadFunction(item) )
{
if( item.function.parameters.size() > 1 )
item.type = Item::item_err;
}
}
void PatternParser::ReadDirectiveLet(Item & item)
{
item.type = Item::item_let;
if( ReadFunction(item) )
{
std::vector<Item::Function*> & parameters = item.function.parameters;
if( parameters.size() > 1 )
{
item.type = Item::item_err;
item.function.Clear();
}
else
if( parameters.size() == 1 && parameters[0]->is_function && !parameters[0]->parameters.empty() )
{
/*
* if the first parameter in [let] is a function e.g. [let variable function_name] (here the first parameter is function_name)
* then the function cannot have parameters itselt (because it is not evaluated here)
* this is only an alias
*/
item.type = Item::item_err;
item.function.Clear();
}
}
}
void PatternParser::ReadDirectiveLetIfNotSet(Item & item)
{
item.type = Item::item_let_if_not_set;
if( ReadFunction(item) )
{
std::vector<Item::Function*> & parameters = item.function.parameters;
if( parameters.size() > 1 )
{
item.type = Item::item_err;
item.function.Clear();
}
else
if( parameters.size() == 1 && parameters[0]->is_function && !parameters[0]->parameters.empty() )
{
/*
* if the first parameter in [let] is a function e.g. [let variable function_name] (here the first parameter is function_name)
* then the function cannot have parameters itselt (because it is not evaluated here)
* this is only an alias
*/
item.type = Item::item_err;
item.function.Clear();
}
}
}
void PatternParser::ReadDirectiveFilter(Item & item)
{
item.type = Item::item_filter;
ReadFunction(item);
}
void PatternParser::ReadDirectiveEzc(Item & item)
{
item.type = Item::item_ezc;
ReadFunction(item);
}
/*
[frame] is a shorthand for [ezc frame]
*/
void PatternParser::ReadDirectiveFrame(Item & item)
{
item.type = Item::item_ezc;
item.has_function = true;
item.function.Clear();
item.function.name = L"frame";
item.function.is_function = true;
if( !ReadParams(item.function) )
{
item.type = Item::item_err;
item.function.Clear();
}
}
void PatternParser::ReadDirectiveBlock(Item & item)
{
item.type = Item::item_block;
if( ReadFunction(item) )
{
// only one function without arguments
if( !item.function.parameters.empty() )
{
item.type = Item::item_err;
item.function.Clear();
}
}
}
void PatternParser::ReadDirectiveReturn(Item & item)
{
item.type = Item::item_return;
ReadFunction(item);
}
void PatternParser::CreateTreeReadItemDirective(Item & item)
{
std::wstring name;
++itext;
SkipWhite();
const wchar_t * old_itext = itext;
ReadName(name);
if ( name == L"if" ) ReadDirectiveIf(item);
else if( name == L"end" ) ReadDirectiveEnd(item);
else if( name == L"else" ) ReadDirectiveElse(item);
else if( name == L"for" ) ReadDirectiveFor(item);
else if( name == L"include" ) ReadDirectiveInclude(item);
else if( name == L"def" ) ReadDirectiveDef(item);
else if( name == L"def?" ) ReadDirectiveDefIfNotSet(item);
else if( name == L"let" ) ReadDirectiveLet(item);
else if( name == L"let?" ) ReadDirectiveLetIfNotSet(item);
else if( name == L"filter" ) ReadDirectiveFilter(item);
else if( name == L"ezc" ) ReadDirectiveEzc(item);
else if( name == L"frame" ) ReadDirectiveFrame(item);
else if( name == L"block" ) ReadDirectiveBlock(item);
else if( name == L"return" ) ReadDirectiveReturn(item);
else if( name == L"#" ) ReadDirectiveComment(item);
else if( *old_itext == '[' || !name.empty() )
{
itext = old_itext;
ReadNormalStatement(item);
}
CreateTreeReadItemDirectiveCheckEnding(item);
}
void PatternParser::CreateTreeReadItemText(Item & item)
{
int c;
while( (c = ReadCharInText()) != -1 )
item.text += static_cast<wchar_t>(c);
if( delete_white_text_items )
CheckWhiteAndDelete(item.text);
item.type = Item::item_text;
}
void PatternParser::CreateTreeReadDirectiveExpression(Item & item, bool is_statement)
{
int brackets_counter = is_statement ? 1 : 0;
while( *itext )
{
wchar_t c = *itext;
if( c == 10 || c == 13 )
c = ' ';
if( c == ';' && brackets_counter == 0 )
{
itext += 1;
return; // end of normal expression (not in a statement such as 'for' or 'if')
}
if( c == '(' )
{
brackets_counter += 1;
}
if( c == ')' )
{
brackets_counter -= 1;
if( is_statement && brackets_counter == 0 )
{
itext += 1;
return; // end of statement expression
}
}
if( !IsWhite(c) || item.text.empty() || !IsWhite(item.text.back()) )
item.text += c;
itext += 1;
}
}
bool PatternParser::CreateTreeCheckProgramDirective(Item & item)
{
const wchar_t * old_itext = itext;
if( pt::is_substr_nc(L"if", itext) )
{
itext += 2;
SkipWhite();
if( *itext == '(' )
{
itext += 1;
item.type = Item::item_if;
return true;
}
}
if( pt::is_substr_nc(L"while", itext) )
{
itext += 5;
SkipWhite();
if( *itext == '(' )
{
itext += 1;
item.type = Item::item_for;
return true;
}
}
itext = old_itext;
return false;
}
bool PatternParser::CreateTreeReadExpression(Item & item)
{
SkipWhite();
if( *itext == 0 )
return false;
if( *itext == '{' )
{
item.type = Item::item_container;
itext += 1;
return true;
}
if( *itext == '}' )
{
item.type = Item::item_end;
itext += 1;
return true;
}
if( CreateTreeCheckProgramDirective(item) )
{
CreateTreeReadDirectiveExpression(item, true);
}
else
{
item.type = Item::item_function;
CreateTreeReadDirectiveExpression(item, false);
}
return true;
}
bool PatternParser::CreateTreeReadItem(Item & item)
{
item.Clear();
if( program_mode )
{
return CreateTreeReadExpression(item);
}
else
{
if( *itext == '[' )
{
CreateTreeReadItemDirective(item);
return true;
}
else
if( *itext )
{
CreateTreeReadItemText(item);
return true;
}
}
// the end of the string
return false;
}
void PatternParser::CreateTreeReadInclude(Item & item)
{
if( allow_include )
{
CreateTreeReadIncludeSkipAllowFlag(item);
}
else
{
if( log )
{
(*log) << pt::Log::log2 << "Ezc: \"include\" directive is not allowed" << pt::Log::logend;
}
}
}
void PatternParser::CreateTreeReadIncludeSkipAllowFlag(Item & item)
{
if( item.file_name.empty() )
return;
if( include_level > include_level_max )
{
if( log )
{
(*log) << pt::Log::log1 << "Ezc: \"include\" directive has reached the maximum level" << pt::Log::logend;
}
return;
}
++include_level;
std::wstring file_text; // this temporary object must not be global (includes can be nested)
ReadFile(item.file_name, file_text);
const wchar_t * itext_old = itext;
itext = file_text.c_str();
item.Clear();
CreateTreeContainer(item);
itext = itext_old;
--include_level;
}
void PatternParser::CreateTreeReadIf(Item & item)
{
Item * pitem = item.AddItem();
pitem->Clear();
if( program_mode )
CreateTree(*pitem);
else
CreateTreeContainer(*pitem);
if( program_mode )
{
SkipWhite();
if( pt::is_substr_nc(L"else", itext) )
{
itext += 4;
pitem = item.AddItem();
CreateTree(*pitem);
}
}
else
{
if( pitem->LastItemType() == Item::item_else )
{
pitem->DeleteLastItem();
pitem = item.AddItem();
CreateTreeContainer(*pitem);
}
}
if( pitem->LastItemType() == Item::item_end )
pitem->DeleteLastItem();
}
void PatternParser::CreateTreeReadBlock(Item & item)
{
Item item_block;
CreateTreeContainer(item_block);
if( item_block.LastItemType() == Item::item_end )
item_block.DeleteLastItem();
//if( pblocks && item.functions.size()==1 )
if( pblocks )
pblocks->Insert(item.function.name, item_block);
}
void PatternParser::CreateTreeReadFor(Item & item)
{
Item * pitem = item.AddItem();
if( program_mode )
CreateTree(*pitem);
else
CreateTreeContainer(*pitem);
if( pitem->LastItemType() == Item::item_end )
pitem->DeleteLastItem();
}
bool PatternParser::CreateTree(Item & item)
{
do
{
if( !CreateTreeReadItem(item) )
{
return false;
}
if( item.type == Item::item_block )
CreateTreeReadBlock(item);
}
while( item.type == Item::item_comment || item.type == Item::item_block );
// such container can be read in program mode
if( item.type == Item::item_container )
CreateTreeContainer(item);
if( item.type == Item::item_if )
CreateTreeReadIf(item);
// CHECK ME is it correct to check item_filter and item_ezc here and call CreateTreeReadFor?
if( item.type == Item::item_for ||
item.type == Item::item_filter ||
item.type == Item::item_ezc )
CreateTreeReadFor(item);
if( item.type == Item::item_include )
CreateTreeReadInclude(item);
return true;
}
void PatternParser::CreateTreeContainer(Item & item)
{
bool item_read_correctly;
Item * pitem;
item.type = Item::item_container;
do
{
pitem = item.AddItem();
item_read_correctly = CreateTree(*pitem);
}
while( item_read_correctly && pitem->type != Item::item_end && pitem->type != Item::item_else);
if( !item_read_correctly )
item.DeleteLastItem();
}
} // namespace Ezc