/*
* Copyright (C) 2020 Weber Yann
*
* This file is part of pyrpn.
*
* pyrpn is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* pyrpn is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with pyrpn. If not, see .
*/
#ifndef __rpn_parse__h__
#define __rpn_parse__h__
#include
#include
#include
#include
#include "config.h"
#include "rpn_lib.h"
/**@file rpn_parse.h
* @brief RPN expression parsing headers
* @ingroup rpn_tokenize
*
* Contains headers of @ref rpn_tokenize and @ref rpn_parse .
*/
/**@defgroup rpn_tokenize Expression tokenization
* @brief Parsing an expression into a @ref rpn_tokenized_t
*
* The tokenized form ( see @ref rpn_tokenized_t ) of an expression is usefull
* for @ref mutation.
*
* The tokenizing process is done in a way allowing compilation process to
* fetch tokens while parsing the expression (see @ref rpn_tok).
* @ingroup rpn_compile
*/
/**@defgroup rpn_parse Token parsing functions
* @brief Internal parsing functions
* @ingroup rpn_tokenize
*/
/**@brief Shortcut for loop on all operations list */
#define foreach_rpn_ops(IDX) for(IDX=0; IDXerr_reason))
/**@brief Shortcut for struct @ref rpn_op_s */
typedef struct rpn_op_s rpn_op_t;
/**@brief Shortcut for struct @ref rpn_token_type_e */
typedef enum rpn_token_type_e rpn_token_type_t;
/**@brief Shortcut for struct @ref rpn_token_s */
typedef struct rpn_token_s rpn_token_t;
/**@brief Shortcut for struct @ref rpn_tokenized_s */
typedef struct rpn_tokenized_s rpn_tokenized_t;
/**@brief Shortcut for struct @ref rpn_tokenizer_s */
typedef struct rpn_tokenizer_s rpn_tokenizer_t;
/**@brief Handles operation identification informations storage
* @ingroup rpn_tokenize
*/
struct rpn_op_s
{
/**@brief Pointer on function pointer */
const void **fun;
/**@brief Function code size */
const unsigned long *fun_sz;
/**@brief Caracter representing operation ('\0' if None)*/
char chr;
/**@brief String representing operation */
char *str;
};
/**@brief Defines @ref rpn_token_s types
* @ingroup rpn_tokenize */
enum rpn_token_type_e {
/**@brief The token is an operation */
RPN_op,
/**@brief The token is an argument */
RPN_arg,
/**@brief The token is a value */
RPN_val
};
/**@brief Represent an expression token (value, argument or operation)
* @ingroup rpn_tokenize */
struct rpn_token_s
{
/**@brief Token type */
rpn_token_type_t type;
/**@brief Token data depending on @ref type */
union {
/**@brief Token data for @ref RPN_op tokens */
struct {
/**@brief Indicate the operation index in @ref rpn_ops */
unsigned char op_n;
/**@brief Pointer on operation informations */
const rpn_op_t *op;
};
/**@brief Indicate the argument number */
unsigned long int arg_n;
/**@brief Indicate the constant value */
unsigned long int value;
};
};
//} __attribute__((aligned));
/**@brief Represent a tokenized expression
*
* A list of @ref rpn_token_s and argc
* @ingroup rpn_tokenize */
struct rpn_tokenized_s
{
/**@brief Number of expected arguments */
size_t argc;
/**@brief The number of token in the expression */
size_t tokens_sz;
/**@brief List of tokens */
rpn_token_t *tokens;
};
/**@brief Handles data will tokenizing
*
* Store compilation state, allowing to return new token as soon as they
* become ready.
* @ingroup rpn_tokenize */
struct rpn_tokenizer_s
{
/**@brief Source expression */
const char *orig;
/**@brief Expression work buffer */
char *buff;
/**@brief Current expression buffer */
char *cur;
/**@brief Current chr number (for error generation & debugging) */
size_t chr_no;
/**@brief The tokenized representation of the expression
* @note Should point on @ref rpn_expr_t::toks */
rpn_tokenized_t *toks;
/**@brief The number of allocated rpn_token_t in toks */
size_t allocated_toks;
/**@brief Tokenization error */
char err_reason[64];
};
/**@brief Define all operations
*
* Stores operation identification informations
* @ingroup rpn_tokenize */
extern const rpn_op_t rpn_ops[];
/**@brief Initialize a tokenizer and a tokenized representation
* @param tokenizer Pointer on a new tokenizer
* @param dst Pointer on a tokenized struct to store generated tokens
* @param expr Pointer on the RPN expression to tokenize
* @param argc Number of argument accepted by expression
* @return 0 if no error else -1 and set @ref rpn_tokenizer_s::err_reason
* @warning no NULL checks for the moment...
* @ingroup rpn_tokenize
*/
int rpn_tokenizer_start(rpn_tokenizer_t *tokenizer, rpn_tokenized_t *dst,
const char* expr, size_t argc);
/**@brief Return the next token
* @param tokenizer Pointer on tokenizing task informations
* @return The a pointer on next @ref rpn_token_s in @ref rpn_tokenizer_s::toks
* or NULL if end of expression or error
* @note When NULL is returned all ressources are freed, no need to
* call @ref rpn_tokenizer_free
* @ingroup rpn_tokenize
*/
rpn_token_t* rpn_tok(rpn_tokenizer_t *tokenizer);
/**@brief Free ressources of a tokenizer
* @param tokenizer Pointer on the tokenizer we want to deallocate
* @note This method must be used to abord a tokenizing process with no
* error or end of expression encountered
* @ingroup rpn_tokenize
*/
void rpn_tokenizer_free(rpn_tokenizer_t *tokenizer);
/**@brief Tokenize a '\0' terminated string
* @param token A '\0' terminated string
* @param dst Pointer on information destination
* @param error Pointer on an error reason buffer
* @return 0 if dst set and token recognized else -1 and set error buffer
* @warning assert token is not empty
* @ingroup rpn_tokenize
*/
int rpn_tokenize(const char *token, rpn_token_t *dst, char error[64]);
/**@brief Represented a tokenized expression in a string
* @param tokens Tokenized expression
* @param long_op If true uses @ref rpn_op_s::str else @ref rpn_op_s::chr
* @return A newly allocated char* that should be deallocated using free()
* @ingroup rpn_tokenize
*/
char* rpn_tokenized_expr(rpn_tokenized_t *tokens, char long_op);
/**@brief Returns NULL or a pointer on corresponding operation infos
* @param token The token we want to match
* @return NULL or operation informations
* @ingroup rpn_parse
*/
const rpn_op_t* rpn_match_token(const char* token);
/**@brief Return -1 or an index corresponding to @ref rpn_ops
* @param token The token we want to match
* @return NULL or operation informations
* @ingroup rpn_parse
*/
int rpn_match_token_i(const char* token);
/**@brief Get an integer from a token
* @param token The token to decode
* @param result A pointer on the result
* @return -1 if given token is not a decimal number else 0 is returned
* and result is set
* @ingroup rpn_parse
*/
int rpn_match_number(const char* token, unsigned long *result);
/**@brief Get operations list size
* @return number of operations in @ref rpn_ops
*/
size_t rpn_op_sz();
/**@page rpn_lang RPN expression syntax
* @brief Howto write an expression
*
* \section rpn_lang_syntax General syntax
* An expression is composed of tokens separated by 1 or multiple separation
* characters (space, newline or tabs).
*
* There is 3 types of token (see @ref rpn_token_type_e ) : @ref rpn_lang_op ,
* @ref rpn_lang_arg and @ref rpn_lang_value .
*
* \section rpn_lang_tokens RPN tokens
* \subsection rpn_lang_arg Arguments
* Expression can be parametric : arguments are given at evaluation and
* replaced in expression.
*
* In RPN epxressions arguments are desgined by a number (starting from 0)
* and preffixed by 'A' char.
*
* For example an expression evaluating to the sum of their two arguments
* will be written : "A0 A1 +"
*
* \subsection rpn_lang_value Constant values
* Constant values can be expressed in different bases (the Python syntax) :
* - 42
* - 0x2a or 0x2A
* - 0o52
* - 0b101010
*
* \subsection rpn_lang_op Operations
* Operations have two form : a short (1 character long) and a long (a string).
*
* Each valid operations are declared in @ref rpn_ops variable (see
* @ref rpn_parse.c for details).
*
* The @ref python_module expose a function pyrpn.get_ops() ( @see pyrpn_ops )
* returning a dict with long operations as key and short as value.
* \subsubsection rpn_lan_op_internal Internal mechanism
* Operations are done using a loopstack : operands are poped from stack, and
* the result is pushed onto it.
*
* Operations implementation are wrote in x86_64 linux assembly code ( see
* @ref rpn_lib.asm ). Each operations has a corresponding exported symbol
* (declared in @ref rpn_lib.h ) pointing compiled code. This code will be
* copied in a memory map in order to compile (@ref rpn_cmap ) an evaluation
* function.
*/
/**@page rpn_lang_ext Language extension
* @brief Howto add new operations
*
* @section rpn_lang_ext_op Add an operation
*
* In order to add a new operation you have to do three things :
*
* @subsection rpn_lang_ext_asm Write the operation code
*
* You have to write the operation code in @ref rpn_lib.asm and to expose
* the corresponding symbols (the code label and the code portion size).
*
* The macro part_sz allows to do most of the work by :
* - defining a symbol NAME_sz pointing on the code portion size
* - export the symbols NAME and NAME_sz
*
* @warning The part_sz macro HAS TO be placed at the end of the corresponding
* code_portion
*
* @subsection rpn_lang_ext_head Import the symbols in C headers
*
* The @ref rpn_lib.h header is designed to contain all extern assembly symbols
* (pointer on compiled code and on size).
*
* To add a new operation you have to "import" the symbols defined in
* @ref rpn_lib.asm using the @ref CODE_PART macro
*
* @subsection rpn_lang_ext_code Declare corresponding short and long tokens
*
* The @ref rpn_compile will match short or long operations and corresponding
* pre-compiled code.
*
* The association between short (char) long (char*) and pre-compiled code is
* done in the @ref rpn_ops variable.
* @note The __op macro allow simple operation declaration.
*/
#endif