ezc/src/patternparser.cpp

1041 lines
18 KiB
C++

/*
* This file is a part of EZC -- Easy templating in C++ library
* and is distributed under the BSD 3-Clause licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2007-2016, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "patternparser.h"
#ifdef EZC_USE_WINIX_LOGGER
#include "core/log.h"
#endif
namespace Ezc
{
PatternParser::PatternParser()
{
allow_include = true;
pblocks = 0;
include_level_max = 100;
delete_white_text_items = false;
}
void PatternParser::Directory(const char * dir, const char * dir2)
{
directory.clear();
directory2.clear();
if( dir )
PT::UTF8ToWide(dir, directory);
if( dir2 )
PT::UTF8ToWide(dir2, directory2);
}
void PatternParser::Directory(const std::string & dir)
{
PT::UTF8ToWide(dir, directory);
directory2.clear();
}
void PatternParser::Directory(const std::string & dir, const std::string & dir2)
{
PT::UTF8ToWide(dir, directory);
PT::UTF8ToWide(dir2, directory2);
}
void PatternParser::Directory(const wchar_t * dir, const wchar_t * dir2)
{
directory.clear();
directory2.clear();
if( dir )
directory = dir;
if( dir2 )
directory2 = dir2;
}
void PatternParser::Directory(const std::wstring & dir)
{
directory = dir;
directory2.clear();
}
void PatternParser::Directory(const std::wstring & dir, const std::wstring & dir2)
{
directory = dir;
directory2 = dir2;
}
void PatternParser::SetBlocks(Blocks & blocks)
{
pblocks = &blocks;
}
void PatternParser::SetCommentary(const char * com_start, const char * com_stop)
{
PT::UTF8ToWide(com_start, commentary_start);
PT::UTF8ToWide(com_stop, commentary_stop);
}
void PatternParser::SetCommentary(const std::string & com_start, const std::string & com_stop)
{
PT::UTF8ToWide(com_start, commentary_start);
PT::UTF8ToWide(com_stop, commentary_stop);
}
void PatternParser::SetCommentary(const wchar_t * com_start, const wchar_t * com_stop)
{
commentary_start = com_start;
commentary_stop = com_stop;
}
void PatternParser::SetCommentary(const std::wstring & com_start, const std::wstring & com_stop)
{
commentary_start = com_start;
commentary_stop = com_stop;
}
void PatternParser::CreateMsg(std::wstring & out, const wchar_t * type, const wchar_t * arg)
{
out = commentary_start;
out += L"Ezc: ";
out += type;
if( arg )
{
out += ' ';
out += arg;
}
out += commentary_stop;
}
void PatternParser::ParseFile(const std::string & file_name, Pattern & pattern)
{
ParseFile(file_name.c_str(), pattern);
}
void PatternParser::ParseFile(const char * file_name, Pattern & pattern)
{
pat = &pattern;
PT::UTF8ToWide(file_name, pat->item_root.file_name);
include_level = 0;
CreateTreeReadIncludeSkipAllowFlag(pat->item_root);
}
void PatternParser::ParseFile(const std::wstring & file_name, Pattern & pattern)
{
ParseFile(file_name.c_str(), pattern);
}
void PatternParser::ParseFile(const wchar_t * file_name, Pattern & pattern)
{
pat = &pattern;
pat->item_root.file_name = file_name;
include_level = 0;
CreateTreeReadIncludeSkipAllowFlag(pat->item_root);
}
void PatternParser::ParseString(const char * str, Pattern & pattern)
{
PT::UTF8ToWide(str, string_content);
ParseString(string_content.c_str(), pattern);
string_content.clear();
}
void PatternParser::ParseString(const std::string & str, Pattern & pattern)
{
ParseString(str.c_str(), pattern);
}
void PatternParser::ParseString(const wchar_t * str, Pattern & pattern)
{
pat = &pattern;
itext = str;
include_level = 0;
CreateTree(pat->item_root);
}
void PatternParser::ParseString(const std::wstring & str, Pattern & pattern)
{
ParseString(str.c_str(), pattern);
}
void PatternParser::AllowInclude(bool allow)
{
allow_include = allow;
}
void PatternParser::DeleteWhiteTextItems(bool del)
{
delete_white_text_items = del;
}
void PatternParser::SetIncludeMax(int include_max)
{
include_level_max = include_max;
}
bool PatternParser::HasFileAtBeginning(const wchar_t * path, const wchar_t * file)
{
for(; *path && *file; ++path, ++file)
{
if( *path != *file )
return false;
}
if( *file != 0 )
return false;
// "\" is from a dos path syntax
if( *path==0 || *path=='\\' || *path=='/' )
return true;
return false;
}
/*
".." is not allowed in the file path
you cannot go up from your template directory
*/
bool PatternParser::IsFileCorrect(const wchar_t * name)
{
while( *name )
{
if( HasFileAtBeginning(name, L"..") )
return false;
// looking for the next slash of backslash
while( *name && *name!='\\' && *name!='/' )
name += 1;
// skipping the slash (or backslash)
if( *name )
name += 1;
}
return true;
}
/*
'name' must be a relative path - without a slash or backslash
*/
void PatternParser::ReadFile(const std::wstring & name, std::wstring & result)
{
ReadFile(name.c_str(), result);
}
/*
'name' must be a relative path - without a slash or backslash
*/
void PatternParser::ReadFile(const wchar_t * name, std::wstring & result)
{
if( !IsFileCorrect(name) )
{
CreateMsg(result, L"incorrect file name: ", name);
}
else
{
result.clear();
if( !ReadFileFromDir(directory, name, result) )
if( !ReadFileFromDir(directory2, name, result) )
CreateMsg(result, L"can't open: ", name);
}
}
bool PatternParser::ReadFileFromDir(const std::wstring & dir, const wchar_t * name, std::wstring & result)
{
if( dir.empty() )
return false;
file_name = dir;
file_name += '/';
file_name += name;
PT::WideToUTF8(file_name, afile_name);
std::ifstream file(afile_name.c_str());
if( !file )
{
file_name.clear();
afile_name.clear();
return false;
}
#ifdef EZC_USE_WINIX_LOGGER
if( include_level <= 1 )
Winix::log << Winix::log3 << "Ezc: reading pattern: " << afile_name << Winix::logend;
else
Winix::log << Winix::log3 << " including pattern: " << afile_name << Winix::logend;
#endif
ReadFile(file, result);
file_name.clear();
afile_name.clear();
return true;
}
void PatternParser::ReadFile(std::ifstream & file, std::wstring & result)
{
PT::UTF8ToWide(file, result);
}
int PatternParser::ReadCharInText()
{
if( *itext==0 || *itext=='[' )
return -1;
if( *itext == '\\' )
{
if( *(itext+1)=='\\' || *(itext+1)=='[' || *(itext+1)==']' )
++itext;
}
return *(itext++);
}
bool PatternParser::IsWhite(wchar_t c)
{
// 13 (\r) is from a dos file at the end of a line (\r\n)
// 160 is a non-breaking space
if( c==' ' || c=='\t' || c==13 || c==160 || c==10 )
return true;
return false;
}
void PatternParser::SkipWhite()
{
while( IsWhite(*itext) )
++itext;
}
void PatternParser::SkipOneStatement()
{
size_t count = 1;
while( *itext != 0 && count > 0 )
{
if( *itext == '[' )
count += 1;
else
if( *itext == ']' )
count -= 1;
itext += 1;
}
}
void PatternParser::CheckWhiteAndDelete(std::wstring & s)
{
size_t i;
if( s.empty() )
return;
for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);
if( i == s.size() )
s.clear(); // the whole string consists of white characters
}
void PatternParser::CheckFunctionIsNumber(Item::Function & function)
{
if( IsPositiveNumber(function.name) )
function.arg = wcstol(function.name.c_str(), 0, 10);
}
bool PatternParser::IsNameChar(wchar_t c)
{
return ((c>='a' && c<='z') ||
(c>='A' && c<='Z') ||
(c>='0' && c<='9') ||
c=='_' || c=='-' || c=='.' || c=='#');
}
bool PatternParser::IsDigit(wchar_t c)
{
return (c>='0' && c<='9');
}
bool PatternParser::IsPositiveNumber(const std::wstring & str)
{
size_t i;
for(i=0 ; i<str.size() ; ++i)
if( !IsDigit(str[i]) )
return false;
return true;
}
// reading an expression name or a function name
bool PatternParser::ReadName(std::wstring & name)
{
name.clear();
while( IsNameChar(*itext) )
{
name += *itext;
++itext;
}
return !name.empty();
}
// string can have a quote character (escaped with a backslash) e.g. "sample text \"with quotes\""
// use \\ to insert one backslash
bool PatternParser::ReadString(std::wstring & str)
{
str.clear();
SkipWhite();
// string is signed by its first quote character (")
if( *itext != '\"' )
return false;
++itext;
while( *itext && *itext!='\"' )
{
if( itext[0]=='\\' && itext[1]=='\"' )
{
str += '\"';
itext += 2;
}
else
if( itext[0]=='\\' && itext[1]=='\\' )
{
str += '\\';
itext += 2;
}
else
{
str += *itext;
itext += 1;
}
}
if( *itext == '\"' )
{
++itext;
return true;
}
else
{
return false;
}
}
bool PatternParser::ReadNestedFunction(Item::Function & function)
{
++itext; // skipping '['
bool res = ReadFunction(function.AddNewParam(), true);
if( !res )
{
SkipOneStatement();
return false;
}
SkipWhite();
if( *itext == ']' )
{
++itext;
return true;
}
else
{
SkipOneStatement();
return false;
}
}
bool PatternParser::ReadParamString(Item::Function & function)
{
Item::Function & fun = function.AddNewParam();
fun.is_function = false;
return ReadString(fun.name);
}
/*
returns true if it correctly reads all parameters
*/
bool PatternParser::ReadParams(Item::Function & function)
{
bool res;
do
{
SkipWhite();
if( *itext == '[' )
{
res = ReadNestedFunction(function);
}
else
if( *itext == '\"' )
{
res = ReadParamString(function);
}
else
if( *itext != ']' && *itext != 0 )
{
res = ReadFunction(function.AddNewParam(), false);
}
else
{
// *itext is equal to ']'
res = true;
break;
}
}
while( res );
return res;
}
/*
returns true if it correctly reads all parameters
*/
bool PatternParser::ReadFunction(Item::Function & function, bool with_params, const std::wstring * function_name)
{
SkipWhite();
function.Clear();
function.is_function = true;
if( function_name )
{
function.name = *function_name;
}
else
if( !ReadName(function.name) )
return false;
if( *itext == ':' )
{
itext += 1;
ReadName(function.postfix); // we allow the postfix to be empty
}
CheckFunctionIsNumber(function);
if( with_params )
return ReadParams(function);
return true;
}
/*
returns true if it correctly reads all parameters
*/
bool PatternParser::ReadFunction(Item & item)
{
SkipWhite();
if( *itext == ']' )
{
item.has_function = false;
}
else
{
item.has_function = true;
if( !ReadFunction(item.function, true) )
{
item.function.Clear();
item.type = Item::item_err;
return false;
}
}
return true;
}
void PatternParser::CreateTreeReadItemDirectiveCheckEnding(Item & item)
{
SkipWhite();
if( *itext == ']' )
{
itext += 1;
}
else
{
item.type = Item::item_err;
item.function.Clear();
SkipOneStatement();
}
}
// user defined directive
void PatternParser::ReadDirectiveNormal(const std::wstring & name, Item & item)
{
item.type = Item::item_function;
item.has_function = true;
if( !ReadFunction(item.function, true, &name) )
{
item.type = Item::item_err;
item.function.Clear();
}
}
void PatternParser::ReadDirectiveIf(Item & item)
{
item.type = Item::item_if;
ReadFunction(item);
}
void PatternParser::ReadDirectiveEnd(Item & item)
{
item.type = Item::item_end;
}
void PatternParser::ReadDirectiveElse(Item & item)
{
item.type = Item::item_else;
}
void PatternParser::ReadDirectiveFor(Item & item)
{
item.type = Item::item_for;
ReadFunction(item);
}
void PatternParser::ReadDirectiveComment(Item & item)
{
item.type = Item::item_comment;
// skipping the comment
while( *itext && *itext!=']' )
++itext;
}
void PatternParser::ReadDirectiveInclude(Item & item)
{
if( ReadString(item.file_name) )
item.type = Item::item_include;
else
item.type = Item::item_err;
}
void PatternParser::ReadDirectiveDef(Item & item)
{
item.type = Item::item_def;
ReadFunction(item);
}
void PatternParser::ReadDirectiveFilter(Item & item)
{
item.type = Item::item_filter;
ReadFunction(item);
}
void PatternParser::ReadDirectiveEzc(Item & item)
{
item.type = Item::item_ezc;
ReadFunction(item);
}
/*
[out] is a shorthand for [ezc out]
*/
void PatternParser::ReadDirectiveOut(Item & item)
{
item.type = Item::item_ezc;
item.has_function = true;
item.function.Clear();
item.function.name = L"out";
item.function.is_function = true;
if( !ReadParams(item.function) )
{
item.type = Item::item_err;
item.function.Clear();
}
}
void PatternParser::ReadDirectiveBlock(Item & item)
{
item.type = Item::item_block;
ReadFunction(item);
// only one function without arguments
if( !item.function.parameters.empty() )
item.type = Item::item_err;
}
void PatternParser::ReadDirectiveReturn(Item & item)
{
item.type = Item::item_return;
ReadFunction(item);
}
void PatternParser::CreateTreeReadItemDirective(Item & item)
{
std::wstring name;
++itext;
SkipWhite();
ReadName(name);
if ( name == L"if" ) ReadDirectiveIf(item);
else if( name == L"end" ) ReadDirectiveEnd(item);
else if( name == L"else" ) ReadDirectiveElse(item);
else if( name == L"for" ) ReadDirectiveFor(item);
else if( name == L"include" ) ReadDirectiveInclude(item);
else if( name == L"def" ) ReadDirectiveDef(item);
else if( name == L"filter" ) ReadDirectiveFilter(item);
else if( name == L"ezc" ) ReadDirectiveEzc(item);
else if( name == L"out" ) ReadDirectiveOut(item);
else if( name == L"block" ) ReadDirectiveBlock(item);
else if( name == L"return" ) ReadDirectiveReturn(item);
else if( name == L"#" ) ReadDirectiveComment(item);
else if( !name.empty() ) ReadDirectiveNormal(name, item);
CreateTreeReadItemDirectiveCheckEnding(item);
}
void PatternParser::CreateTreeReadItemText(Item & item)
{
int c;
while( (c = ReadCharInText()) != -1 )
item.text += static_cast<wchar_t>(c);
if( delete_white_text_items )
CheckWhiteAndDelete(item.text);
item.type = Item::item_text;
}
bool PatternParser::CreateTreeReadItem(Item & item)
{
item.Clear();
if( *itext == '[' )
{
CreateTreeReadItemDirective(item);
return true;
}
else
if( *itext )
{
CreateTreeReadItemText(item);
return true;
}
// the end of the string
return false;
}
void PatternParser::CreateTreeReadInclude(Item & item)
{
if( !allow_include )
return;
CreateTreeReadIncludeSkipAllowFlag(item);
}
void PatternParser::CreateTreeReadIncludeSkipAllowFlag(Item & item)
{
if( item.file_name.empty() )
return;
if( include_level > include_level_max )
{
#ifdef EZC_USE_WINIX_LOGGER
Winix::log << Winix::log1 << "Ezc: \"include\" directive has reached the maximum level" << Winix::logend;
#endif
return;
}
++include_level;
std::wstring file_text; // this temporary object must not be global (includes can be nested)
ReadFile(item.file_name, file_text);
const wchar_t * itext_old = itext;
itext = file_text.c_str();
CreateTree(item);
itext = itext_old;
--include_level;
}
void PatternParser::CreateTreeReadIf(Item & item)
{
Item * pitem = item.AddItem();
CreateTree(*pitem);
if( pitem->LastItemType() == Item::item_else )
{
pitem->DeleteLastItem();
pitem = item.AddItem();
CreateTree(*pitem);
}
if( pitem->LastItemType() == Item::item_end )
pitem->DeleteLastItem();
}
void PatternParser::CreateTreeReadBlock(Item & item)
{
Item item_block;
CreateTree(item_block);
if( item_block.LastItemType() == Item::item_end )
item_block.DeleteLastItem();
//if( pblocks && item.functions.size()==1 )
if( pblocks )
pblocks->Insert(item.function.name, item_block);
}
void PatternParser::CreateTreeReadFor(Item & item)
{
Item * pitem = item.AddItem();
CreateTree(*pitem);
if( pitem->LastItemType() == Item::item_end )
pitem->DeleteLastItem();
}
void PatternParser::CreateTree(Item & item)
{
item.Clear();
item.type = Item::item_container;
while( true )
{
Item * pitem = item.AddItem();
do
{
if( !CreateTreeReadItem(*pitem) )
{
item.DeleteLastItem();
return;
}
if( pitem->type == Item::item_block )
CreateTreeReadBlock(*pitem);
}
while( pitem->type == Item::item_comment ||
pitem->type == Item::item_block );
if( pitem->type == Item::item_end || pitem->type == Item::item_else )
return;
if( pitem->type == Item::item_if )
CreateTreeReadIf(*pitem);
if( pitem->type == Item::item_for ||
pitem->type == Item::item_filter ||
pitem->type == Item::item_ezc )
CreateTreeReadFor(*pitem);
if( pitem->type == Item::item_include )
CreateTreeReadInclude(*pitem);
}
}
} // namespace Ezc