/* * Copyright (C) 2020 Weber Yann * * This file is part of pyrpn. * * pyrpn is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * any later version. * * pyrpn is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with pyrpn. If not, see . */ #ifndef __rpn_parse__h__ #define __rpn_parse__h__ #include #include #include #include #include "config.h" #include "rpn_lib.h" /**@file rpn_parse.h * @brief RPN expression parsing headers * @ingroup rpn_tokenize * * Contains headers of @ref rpn_tokenize and @ref rpn_parse . */ /**@defgroup rpn_tokenize Expression tokenization * @brief Parsing an expression into a @ref rpn_tokenized_t * * The tokenized form ( see @ref rpn_tokenized_t ) of an expression is usefull * for @ref mutation. * * The tokenizing process is done in a way allowing compilation process to * fetch tokens while parsing the expression (see @ref rpn_tok). * @ingroup rpn_compile */ /**@defgroup rpn_parse Token parsing functions * @brief Internal parsing functions * @ingroup rpn_tokenize */ /**@brief Shortcut for loop on all operations list */ #define foreach_rpn_ops(IDX) for(IDX=0; IDXerr_reason)) /**@brief Shortcut for struct @ref rpn_op_s */ typedef struct rpn_op_s rpn_op_t; /**@brief Shortcut for struct @ref rpn_token_type_e */ typedef enum rpn_token_type_e rpn_token_type_t; /**@brief Shortcut for struct @ref rpn_token_s */ typedef struct rpn_token_s rpn_token_t; /**@brief Shortcut for struct @ref rpn_tokenized_s */ typedef struct rpn_tokenized_s rpn_tokenized_t; /**@brief Shortcut for struct @ref rpn_tokenizer_s */ typedef struct rpn_tokenizer_s rpn_tokenizer_t; /**@brief Handles operation identification informations storage * @ingroup rpn_tokenize */ struct rpn_op_s { /**@brief Pointer on function pointer */ const void **fun; /**@brief Function code size */ const unsigned long *fun_sz; /**@brief Caracter representing operation ('\0' if None)*/ char chr; /**@brief String representing operation */ char *str; }; /**@brief Defines @ref rpn_token_s types * @ingroup rpn_tokenize */ enum rpn_token_type_e { /**@brief The token is an operation */ RPN_op, /**@brief The token is an argument */ RPN_arg, /**@brief The token is a value */ RPN_val }; /**@brief Represent an expression token (value, argument or operation) * @ingroup rpn_tokenize */ struct rpn_token_s { /**@brief Token type */ rpn_token_type_t type; /**@brief Token data depending on @ref type */ union { /**@brief Token data for @ref RPN_op tokens */ struct { /**@brief Indicate the operation index in @ref rpn_ops */ unsigned char op_n; /**@brief Pointer on operation informations */ const rpn_op_t *op; }; /**@brief Indicate the argument number */ unsigned long int arg_n; /**@brief Indicate the constant value */ unsigned long int value; }; }; //} __attribute__((aligned)); /**@brief Represent a tokenized expression * * A list of @ref rpn_token_s and argc * @ingroup rpn_tokenize */ struct rpn_tokenized_s { /**@brief Number of expected arguments */ size_t argc; /**@brief The number of token in the expression */ size_t tokens_sz; /**@brief List of tokens */ rpn_token_t *tokens; }; /**@brief Handles data will tokenizing * * Store compilation state, allowing to return new token as soon as they * become ready. * @ingroup rpn_tokenize */ struct rpn_tokenizer_s { /**@brief Source expression */ const char *orig; /**@brief Expression work buffer */ char *buff; /**@brief Current expression buffer */ char *cur; /**@brief Current chr number (for error generation & debugging) */ size_t chr_no; /**@brief The tokenized representation of the expression * @note Should point on @ref rpn_expr_t::toks */ rpn_tokenized_t *toks; /**@brief The number of allocated rpn_token_t in toks */ size_t allocated_toks; /**@brief Tokenization error */ char err_reason[64]; }; /**@brief Define all operations * * Stores operation identification informations * @ingroup rpn_tokenize */ extern const rpn_op_t rpn_ops[]; /**@brief Initialize a tokenizer and a tokenized representation * @param tokenizer Pointer on a new tokenizer * @param dst Pointer on a tokenized struct to store generated tokens * @param expr Pointer on the RPN expression to tokenize * @param argc Number of argument accepted by expression * @return 0 if no error else -1 and set @ref rpn_tokenizer_s::err_reason * @warning no NULL checks for the moment... * @ingroup rpn_tokenize */ int rpn_tokenizer_start(rpn_tokenizer_t *tokenizer, rpn_tokenized_t *dst, const char* expr, size_t argc); /**@brief Return the next token * @param tokenizer Pointer on tokenizing task informations * @return The a pointer on next @ref rpn_token_s in @ref rpn_tokenizer_s::toks * or NULL if end of expression or error * @note When NULL is returned all ressources are freed, no need to * call @ref rpn_tokenizer_free * @ingroup rpn_tokenize */ rpn_token_t* rpn_tok(rpn_tokenizer_t *tokenizer); /**@brief Free ressources of a tokenizer * @param tokenizer Pointer on the tokenizer we want to deallocate * @note This method must be used to abord a tokenizing process with no * error or end of expression encountered * @ingroup rpn_tokenize */ void rpn_tokenizer_free(rpn_tokenizer_t *tokenizer); /**@brief Tokenize a '\0' terminated string * @param token A '\0' terminated string * @param dst Pointer on information destination * @param error Pointer on an error reason buffer * @return 0 if dst set and token recognized else -1 and set error buffer * @warning assert token is not empty * @ingroup rpn_tokenize */ int rpn_tokenize(const char *token, rpn_token_t *dst, char error[64]); /**@brief Represented a tokenized expression in a string * @param tokens Tokenized expression * @param long_op If true uses @ref rpn_op_s::str else @ref rpn_op_s::chr * @return A newly allocated char* that should be deallocated using free() * @ingroup rpn_tokenize */ char* rpn_tokenized_expr(rpn_tokenized_t *tokens, char long_op); /**@brief Returns NULL or a pointer on corresponding operation infos * @param token The token we want to match * @return NULL or operation informations * @ingroup rpn_parse */ const rpn_op_t* rpn_match_token(const char* token); /**@brief Return -1 or an index corresponding to @ref rpn_ops * @param token The token we want to match * @return NULL or operation informations * @ingroup rpn_parse */ int rpn_match_token_i(const char* token); /**@brief Get an integer from a token * @param token The token to decode * @param result A pointer on the result * @return -1 if given token is not a decimal number else 0 is returned * and result is set * @ingroup rpn_parse */ int rpn_match_number(const char* token, unsigned long *result); /**@brief Get operations list size * @return number of operations in @ref rpn_ops */ size_t rpn_op_sz(); /**@page rpn_lang RPN expression syntax * @brief Howto write an expression * * \section rpn_lang_syntax General syntax * An expression is composed of tokens separated by 1 or multiple separation * characters (space, newline or tabs). * * There is 3 types of token (see @ref rpn_token_type_e ) : @ref rpn_lang_op , * @ref rpn_lang_arg and @ref rpn_lang_value . * * \section rpn_lang_tokens RPN tokens * \subsection rpn_lang_arg Arguments * Expression can be parametric : arguments are given at evaluation and * replaced in expression. * * In RPN epxressions arguments are desgined by a number (starting from 0) * and preffixed by 'A' char. * * For example an expression evaluating to the sum of their two arguments * will be written : "A0 A1 +" * * \subsection rpn_lang_value Constant values * Constant values can be expressed in different bases (the Python syntax) : * - 42 * - 0x2a or 0x2A * - 0o52 * - 0b101010 * * \subsection rpn_lang_op Operations * Operations have two form : a short (1 character long) and a long (a string). * * Each valid operations are declared in @ref rpn_ops variable (see * @ref rpn_parse.c for details). * * The @ref python_module expose a function pyrpn.get_ops() ( @see pyrpn_ops ) * returning a dict with long operations as key and short as value. * \subsubsection rpn_lan_op_internal Internal mechanism * Operations are done using a loopstack : operands are poped from stack, and * the result is pushed onto it. * * Operations implementation are wrote in x86_64 linux assembly code ( see * @ref rpn_lib.asm ). Each operations has a corresponding exported symbol * (declared in @ref rpn_lib.h ) pointing compiled code. This code will be * copied in a memory map in order to compile (@ref rpn_cmap ) an evaluation * function. */ /**@page rpn_lang_ext Language extension * @brief Howto add new operations * * @section rpn_lang_ext_op Add an operation * * In order to add a new operation you have to do three things : * * @subsection rpn_lang_ext_asm Write the operation code * * You have to write the operation code in @ref rpn_lib.asm and to expose * the corresponding symbols (the code label and the code portion size). * * The macro part_sz allows to do most of the work by : * - defining a symbol NAME_sz pointing on the code portion size * - export the symbols NAME and NAME_sz * * @warning The part_sz macro HAS TO be placed at the end of the corresponding * code_portion * * @subsection rpn_lang_ext_head Import the symbols in C headers * * The @ref rpn_lib.h header is designed to contain all extern assembly symbols * (pointer on compiled code and on size). * * To add a new operation you have to "import" the symbols defined in * @ref rpn_lib.asm using the @ref CODE_PART macro * * @subsection rpn_lang_ext_code Declare corresponding short and long tokens * * The @ref rpn_compile will match short or long operations and corresponding * pre-compiled code. * * The association between short (char) long (char*) and pre-compiled code is * done in the @ref rpn_ops variable. * @note The __op macro allow simple operation declaration. */ #endif