347 lines
11 KiB
C
347 lines
11 KiB
C
/*
|
|
* Copyright (C) 2020 Weber Yann
|
|
*
|
|
* This file is part of pyrpn.
|
|
*
|
|
* pyrpn is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* any later version.
|
|
*
|
|
* pyrpn is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with pyrpn. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#ifndef __rpn_parse__h__
|
|
#define __rpn_parse__h__
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
|
|
#include "config.h"
|
|
#include "rpn_lib.h"
|
|
|
|
/**@file rpn_parse.h
|
|
* @brief RPN expression parsing headers
|
|
* @ingroup rpn_tokenize
|
|
*
|
|
* Contains headers of @ref rpn_tokenize and @ref rpn_parse .
|
|
*/
|
|
/**@defgroup rpn_tokenize Expression tokenization
|
|
* @brief Parsing an expression into a @ref rpn_tokenized_t
|
|
*
|
|
* The tokenized form ( see @ref rpn_tokenized_t ) of an expression is usefull
|
|
* for mutations (see @ref rpn_mutate.h ).
|
|
*
|
|
* The tokenizing process is done in a way allowing compilation process to
|
|
* fetch tokens while parsing the expression (see @ref rpn_tok).
|
|
* @ingroup rpn_compile
|
|
*/
|
|
/**@defgroup rpn_parse Token parsing functions
|
|
* @brief Internal parsing functions
|
|
* @ingroup rpn_tokenize
|
|
*/
|
|
|
|
/**@brief Shortcut for loop on all operations list */
|
|
#define foreach_rpn_ops(IDX) for(IDX=0; IDX<RPN_OP_SZ; IDX++)
|
|
|
|
/**@brief Check if a tokenizer is in error state
|
|
* @param tokenizer Pointer on a @ref rpn_tokenizer_s
|
|
* @return false if no error else true
|
|
* @note test if first chr of @ref rpn_tokenizer_s::err_reason is "\0"
|
|
*/
|
|
#define rpn_tokenizer_error(tokenizer) (*((tokenizer)->err_reason))
|
|
|
|
/**@brief Shortcut for struct @ref rpn_op_s */
|
|
typedef struct rpn_op_s rpn_op_t;
|
|
/**@brief Shortcut for struct @ref rpn_token_type_e */
|
|
typedef enum rpn_token_type_e rpn_token_type_t;
|
|
/**@brief Shortcut for struct @ref rpn_token_s */
|
|
typedef struct rpn_token_s rpn_token_t;
|
|
/**@brief Shortcut for struct @ref rpn_tokenized_s */
|
|
typedef struct rpn_tokenized_s rpn_tokenized_t;
|
|
/**@brief Shortcut for struct @ref rpn_tokenizer_s */
|
|
typedef struct rpn_tokenizer_s rpn_tokenizer_t;
|
|
|
|
/**@brief Handles operation identification informations storage
|
|
* @ingroup rpn_tokenize
|
|
*/
|
|
struct rpn_op_s
|
|
{
|
|
/**@brief Pointer on function pointer */
|
|
const void **fun;
|
|
/**@brief Function code size */
|
|
const unsigned long *fun_sz;
|
|
/**@brief Caracter representing operation ('\0' if None)*/
|
|
char chr;
|
|
/**@brief String representing operation */
|
|
char *str;
|
|
};
|
|
|
|
/**@brief Defines @ref rpn_token_s types
|
|
* @ingroup rpn_tokenize */
|
|
enum rpn_token_type_e {
|
|
/**@brief The token is an operation */
|
|
RPN_op,
|
|
/**@brief The token is an argument */
|
|
RPN_arg,
|
|
/**@brief The token is a value */
|
|
RPN_val,
|
|
};
|
|
|
|
/**@brief Represent an expression token (value, argument or operation)
|
|
* @ingroup rpn_tokenize */
|
|
struct rpn_token_s
|
|
{
|
|
/**@brief Token type */
|
|
rpn_token_type_t type;
|
|
|
|
/**@brief Token data depending on @ref type */
|
|
union {
|
|
/**@brief Token data for @ref RPN_op tokens */
|
|
struct {
|
|
/**@brief Indicate the operation index in @ref rpn_ops */
|
|
unsigned char op_n;
|
|
/**@brief Pointer on operation informations */
|
|
const rpn_op_t *op;
|
|
};
|
|
/**@brief Indicate the argument number */
|
|
unsigned long int arg_n;
|
|
/**@brief Indicate the constant value */
|
|
unsigned long int value;
|
|
};
|
|
};
|
|
//} __attribute__((aligned));
|
|
|
|
/**@brief Represent a tokenized expression
|
|
*
|
|
* A list of @ref rpn_token_s and argc
|
|
* @ingroup rpn_tokenize */
|
|
struct rpn_tokenized_s
|
|
{
|
|
/**@brief Number of expected arguments */
|
|
size_t argc;
|
|
/**@brief The number of token in the expression */
|
|
size_t tokens_sz;
|
|
/**@brief List of tokens */
|
|
rpn_token_t *tokens;
|
|
};
|
|
|
|
/**@brief Handles data will tokenizing
|
|
*
|
|
* Store compilation state, allowing to return new token as soon as they
|
|
* become ready.
|
|
* @ingroup rpn_tokenize */
|
|
struct rpn_tokenizer_s
|
|
{
|
|
/**@brief Source expression */
|
|
const char *orig;
|
|
/**@brief Expression work buffer */
|
|
char *buff;
|
|
/**@brief Current expression buffer */
|
|
char *cur;
|
|
/**@brief Current chr number (for error generation & debugging) */
|
|
size_t chr_no;
|
|
|
|
/**@brief The tokenized representation of the expression
|
|
* @note Should point on @ref rpn_expr_t::toks */
|
|
rpn_tokenized_t *toks;
|
|
/**@brief The number of allocated rpn_token_t in toks */
|
|
size_t allocated_toks;
|
|
|
|
/**@brief Tokenization error */
|
|
char err_reason[64];
|
|
};
|
|
|
|
/**@brief Define all operations
|
|
*
|
|
* Stores operation identification informations
|
|
* @ingroup rpn_tokenize */
|
|
extern const rpn_op_t rpn_ops[];
|
|
|
|
/**@brief The count of operand (the size of @ref rpn_ops array) */
|
|
extern const size_t RPN_OP_SZ;
|
|
|
|
|
|
/**@brief Initialize a tokenizer and a tokenized representation
|
|
* @param tokenizer Pointer on a new tokenizer
|
|
* @param dst Pointer on a tokenized struct to store generated tokens
|
|
* @param expr Pointer on the RPN expression to tokenize
|
|
* @param argc Number of argument accepted by expression
|
|
* @return 0 if no error else -1 and set @ref rpn_tokenizer_s::err_reason
|
|
* @warning no NULL checks for the moment...
|
|
* @ingroup rpn_tokenize
|
|
*/
|
|
int rpn_tokenizer_start(rpn_tokenizer_t *tokenizer, rpn_tokenized_t *dst,
|
|
const char* expr, size_t argc);
|
|
|
|
/**@brief Return the next token
|
|
* @param tokenizer Pointer on tokenizing task informations
|
|
* @return The a pointer on next @ref rpn_token_s in @ref rpn_tokenizer_s::toks
|
|
* or NULL if end of expression or error
|
|
* @note When NULL is returned all ressources are freed, no need to
|
|
* call @ref rpn_tokenizer_free
|
|
* @ingroup rpn_tokenize
|
|
*/
|
|
rpn_token_t* rpn_tok(rpn_tokenizer_t *tokenizer);
|
|
|
|
/**@brief Free ressources of a tokenizer
|
|
* @param tokenizer Pointer on the tokenizer we want to deallocate
|
|
* @note This method must be used to abord a tokenizing process with no
|
|
* error or end of expression encountered
|
|
* @ingroup rpn_tokenize
|
|
*/
|
|
void rpn_tokenizer_free(rpn_tokenizer_t *tokenizer);
|
|
|
|
/**@brief Tokenize a '\0' terminated string
|
|
* @param token A '\0' terminated string
|
|
* @param dst Pointer on information destination
|
|
* @param error Pointer on an error reason buffer
|
|
* @return 0 if dst set and token recognized else -1 and set error buffer
|
|
* @warning assert token is not empty
|
|
* @ingroup rpn_tokenize
|
|
*/
|
|
int rpn_tokenize(const char *token, rpn_token_t *dst, char error[64]);
|
|
|
|
/**@brief Represent a tokenized expression in a string
|
|
* @param tokens Tokenized expression
|
|
* @param long_op If true uses @ref rpn_op_s::str else @ref rpn_op_s::chr
|
|
* @return A newly allocated char* that should be deallocated using free()
|
|
* @ingroup rpn_tokenize
|
|
*/
|
|
char* rpn_tokenized_expr(const rpn_tokenized_t *tokens, char long_op);
|
|
|
|
/**@brief Returns NULL or a pointer on corresponding operation infos
|
|
* @param token The token we want to match
|
|
* @return NULL or operation informations
|
|
* @ingroup rpn_parse
|
|
*/
|
|
const rpn_op_t* rpn_match_token(const char* token);
|
|
|
|
/**@brief Returns NULL or pointer on corresponding operation infos
|
|
* @param opcode (index in @ref rpn_ops )
|
|
* @return NULL or operation informations
|
|
*/
|
|
const rpn_op_t* rpn_op_from_opcode(unsigned char opcode);
|
|
|
|
/**@brief Return -1 or an index corresponding to @ref rpn_ops
|
|
* @param token The token we want to match
|
|
* @return NULL or operation informations
|
|
* @ingroup rpn_parse
|
|
*/
|
|
int rpn_match_token_i(const char* token);
|
|
|
|
/**@brief Get an integer from a token
|
|
* @param token The token to decode
|
|
* @param result A pointer on the result
|
|
* @return -1 if given token is not a decimal number else 0 is returned
|
|
* and result is set
|
|
* @ingroup rpn_parse
|
|
*/
|
|
int rpn_match_number(const char* token, unsigned long *result);
|
|
|
|
/**@brief Stores a token string representation in given buffer
|
|
* @param token The token to represent
|
|
* @param dst The destination buffer for the string
|
|
* @param sz The biffer size
|
|
* @return Same as snprintf (the number of chr stored or negative value on error
|
|
*/
|
|
int rpn_token_snprintf(rpn_token_t *token, char *dst, size_t sz);
|
|
|
|
/**@brief Get operations list size
|
|
* @return number of operations in @ref rpn_ops
|
|
*/
|
|
size_t rpn_op_sz();
|
|
/**@brief Macro version of @ref rpn_op_sz() */
|
|
#define RPN_OPS_SZ (sizeof(rpn_ops)/sizeof(rpn_op_t))
|
|
|
|
/**@page rpn_lang RPN expression syntax
|
|
* @brief Howto write an expression
|
|
*
|
|
* \section rpn_lang_syntax General syntax
|
|
* An expression is composed of tokens separated by 1 or multiple separation
|
|
* characters (space, newline or tabs).
|
|
*
|
|
* There is 3 types of token (see @ref rpn_token_type_e ) : @ref rpn_lang_op ,
|
|
* @ref rpn_lang_arg and @ref rpn_lang_value .
|
|
*
|
|
* \section rpn_lang_tokens RPN tokens
|
|
* \subsection rpn_lang_arg Arguments
|
|
* Expression can be parametric : arguments are given at evaluation and
|
|
* replaced in expression.
|
|
*
|
|
* In RPN epxressions arguments are desgined by a number (starting from 0)
|
|
* and preffixed by 'A' char.
|
|
*
|
|
* For example an expression evaluating to the sum of their two arguments
|
|
* will be written : "A0 A1 +"
|
|
*
|
|
* \subsection rpn_lang_value Constant values
|
|
* Constant values can be expressed in different bases (the Python syntax) :
|
|
* - 42
|
|
* - 0x2a or 0x2A
|
|
* - 0o52
|
|
* - 0b101010
|
|
*
|
|
* \subsection rpn_lang_op Operations
|
|
* Operations have two form : a short (1 character long) and a long (a string).
|
|
*
|
|
* Each valid operations are declared in @ref rpn_ops variable (see
|
|
* @ref rpn_parse.c for details).
|
|
*
|
|
* The @ref pymod_pyrpn expose a function pyrpn.get_ops() ( @see pyrpn_ops )
|
|
* returning a dict with long operations as key and short as value.
|
|
* \subsubsection rpn_lan_op_internal Internal mechanism
|
|
* Operations are done using a loopstack : operands are poped from stack, and
|
|
* the result is pushed onto it.
|
|
*
|
|
* Operations implementation are wrote in x86_64 linux assembly code ( see
|
|
* @ref rpn_lib.asm ). Each operations has a corresponding exported symbol
|
|
* (declared in @ref rpn_lib.h ) pointing compiled code. This code will be
|
|
* copied in a memory map in order to compile (@ref rpn_cmap ) an evaluation
|
|
* function.
|
|
*/
|
|
/**@page rpn_lang_ext Language extension
|
|
* @brief Howto add new operations
|
|
*
|
|
* @section rpn_lang_ext_op Add an operation
|
|
*
|
|
* In order to add a new operation you have to do three things :
|
|
*
|
|
* @subsection rpn_lang_ext_asm Write the operation code
|
|
*
|
|
* You have to write the operation code in @ref rpn_lib.asm and to expose
|
|
* the corresponding symbols (the code label and the code portion size).
|
|
*
|
|
* The macro part_sz allows to do most of the work by :
|
|
* - defining a symbol NAME_sz pointing on the code portion size
|
|
* - export the symbols NAME and NAME_sz
|
|
*
|
|
* @warning The part_sz macro HAS TO be placed at the end of the corresponding
|
|
* code_portion
|
|
*
|
|
* @subsection rpn_lang_ext_head Import the symbols in C headers
|
|
*
|
|
* The @ref rpn_lib.h header is designed to contain all extern assembly symbols
|
|
* (pointer on compiled code and on size).
|
|
*
|
|
* To add a new operation you have to "import" the symbols defined in
|
|
* @ref rpn_lib.asm using the @ref CODE_PART macro
|
|
*
|
|
* @subsection rpn_lang_ext_code Declare corresponding short and long tokens
|
|
*
|
|
* The @ref rpn_compile will match short or long operations and corresponding
|
|
* pre-compiled code.
|
|
*
|
|
* The association between short (char) long (char*) and pre-compiled code is
|
|
* done in the @ref rpn_ops variable.
|
|
* @note The __op macro allow simple operation declaration.
|
|
*/
|
|
|
|
#endif
|