123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- /*
- * Copyright (C) 2020 Weber Yann
- *
- * This file is part of pyrpn.
- *
- * pyrpn is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * any later version.
- *
- * pyrpn is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with pyrpn. If not, see <http://www.gnu.org/licenses/>.
- */
- #ifndef __rpn_parse__h__
- #define __rpn_parse__h__
-
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <errno.h>
-
- #include "config.h"
- #include "rpn_lib.h"
-
- /**@file rpn_parse.h
- * @brief RPN expression parsing headers
- * @ingroup rpn_tokenize
- *
- * Contains headers of @ref rpn_tokenize and @ref rpn_parse .
- */
- /**@defgroup rpn_tokenize Expression tokenization
- * @brief Parsing an expression into a @ref rpn_tokenized_t
- *
- * The tokenized form ( see @ref rpn_tokenized_t ) of an expression is usefull
- * for @ref mutation.
- *
- * The tokenizing process is done in a way allowing compilation process to
- * fetch tokens while parsing the expression (see @ref rpn_tok).
- * @ingroup rpn_compile
- */
- /**@defgroup rpn_parse Token parsing functions
- * @brief Internal parsing functions
- * @ingroup rpn_tokenize
- */
-
- /**@brief Shortcut for loop on all operations list */
- #define foreach_rpn_ops(IDX) for(IDX=0; IDX<rpn_op_sz(); IDX++)
-
- /**@brief Check if a tokenizer is in error state
- * @param tokenizer Pointer on a @ref rpn_tokenizer_s
- * @return false if no error else true
- * @note test if first chr of @ref rpn_tokenizer_s::err_reason is "\0"
- */
- #define rpn_tokenizer_error(tokenizer) (*((tokenizer)->err_reason))
-
- /**@brief Shortcut for struct @ref rpn_op_s */
- typedef struct rpn_op_s rpn_op_t;
- /**@brief Shortcut for struct @ref rpn_token_type_e */
- typedef enum rpn_token_type_e rpn_token_type_t;
- /**@brief Shortcut for struct @ref rpn_token_s */
- typedef struct rpn_token_s rpn_token_t;
- /**@brief Shortcut for struct @ref rpn_tokenized_s */
- typedef struct rpn_tokenized_s rpn_tokenized_t;
- /**@brief Shortcut for struct @ref rpn_tokenizer_s */
- typedef struct rpn_tokenizer_s rpn_tokenizer_t;
-
- /**@brief Handles operation identification informations storage
- * @ingroup rpn_tokenize
- */
- struct rpn_op_s
- {
- /**@brief Pointer on function pointer */
- const void **fun;
- /**@brief Function code size */
- const unsigned long *fun_sz;
- /**@brief Caracter representing operation ('\0' if None)*/
- char chr;
- /**@brief String representing operation */
- char *str;
- };
-
- /**@brief Defines @ref rpn_token_s types
- * @ingroup rpn_tokenize */
- enum rpn_token_type_e {
- /**@brief The token is an operation */
- RPN_op,
- /**@brief The token is an argument */
- RPN_arg,
- /**@brief The token is a value */
- RPN_val
- };
-
- /**@brief Represent an expression token (value, argument or operation)
- * @ingroup rpn_tokenize */
- struct rpn_token_s
- {
- /**@brief Token type */
- rpn_token_type_t type;
-
- /**@brief Token data depending on @ref type */
- union {
- /**@brief Token data for @ref RPN_op tokens */
- struct {
- /**@brief Indicate the operation index in @ref rpn_ops */
- unsigned char op_n;
- /**@brief Pointer on operation informations */
- const rpn_op_t *op;
- };
- /**@brief Indicate the argument number */
- unsigned long int arg_n;
- /**@brief Indicate the constant value */
- unsigned long int value;
- };
- };
- //} __attribute__((aligned));
-
- /**@brief Represent a tokenized expression
- *
- * A list of @ref rpn_token_s and argc
- * @ingroup rpn_tokenize */
- struct rpn_tokenized_s
- {
- /**@brief Number of expected arguments */
- size_t argc;
- /**@brief The number of token in the expression */
- size_t tokens_sz;
- /**@brief List of tokens */
- rpn_token_t *tokens;
- };
-
- /**@brief Handles data will tokenizing
- *
- * Store compilation state, allowing to return new token as soon as they
- * become ready.
- * @ingroup rpn_tokenize */
- struct rpn_tokenizer_s
- {
- /**@brief Source expression */
- const char *orig;
- /**@brief Expression work buffer */
- char *buff;
- /**@brief Current expression buffer */
- char *cur;
- /**@brief Current chr number (for error generation & debugging) */
- size_t chr_no;
-
- /**@brief The tokenized representation of the expression
- * @note Should point on @ref rpn_expr_t::toks */
- rpn_tokenized_t *toks;
- /**@brief The number of allocated rpn_token_t in toks */
- size_t allocated_toks;
-
- /**@brief Tokenization error */
- char err_reason[64];
- };
-
- /**@brief Define all operations
- *
- * Stores operation identification informations
- * @ingroup rpn_tokenize */
- extern const rpn_op_t rpn_ops[];
-
- /**@brief Initialize a tokenizer and a tokenized representation
- * @param tokenizer Pointer on a new tokenizer
- * @param dst Pointer on a tokenized struct to store generated tokens
- * @param expr Pointer on the RPN expression to tokenize
- * @param argc Number of argument accepted by expression
- * @return 0 if no error else -1 and set @ref rpn_tokenizer_s::err_reason
- * @warning no NULL checks for the moment...
- * @ingroup rpn_tokenize
- */
- int rpn_tokenizer_start(rpn_tokenizer_t *tokenizer, rpn_tokenized_t *dst,
- const char* expr, size_t argc);
-
- /**@brief Return the next token
- * @param tokenizer Pointer on tokenizing task informations
- * @return The a pointer on next @ref rpn_token_s in @ref rpn_tokenizer_s::toks
- * or NULL if end of expression or error
- * @note When NULL is returned all ressources are freed, no need to
- * call @ref rpn_tokenizer_free
- * @ingroup rpn_tokenize
- */
- rpn_token_t* rpn_tok(rpn_tokenizer_t *tokenizer);
-
- /**@brief Free ressources of a tokenizer
- * @param tokenizer Pointer on the tokenizer we want to deallocate
- * @note This method must be used to abord a tokenizing process with no
- * error or end of expression encountered
- * @ingroup rpn_tokenize
- */
- void rpn_tokenizer_free(rpn_tokenizer_t *tokenizer);
-
- /**@brief Tokenize a '\0' terminated string
- * @param token A '\0' terminated string
- * @param dst Pointer on information destination
- * @param error Pointer on an error reason buffer
- * @return 0 if dst set and token recognized else -1 and set error buffer
- * @warning assert token is not empty
- * @ingroup rpn_tokenize
- */
- int rpn_tokenize(const char *token, rpn_token_t *dst, char error[64]);
-
- /**@brief Represented a tokenized expression in a string
- * @param tokens Tokenized expression
- * @param long_op If true uses @ref rpn_op_s::str else @ref rpn_op_s::chr
- * @return A newly allocated char* that should be deallocated using free()
- * @ingroup rpn_tokenize
- */
- char* rpn_tokenized_expr(rpn_tokenized_t *tokens, char long_op);
-
- /**@brief Returns NULL or a pointer on corresponding operation infos
- * @param token The token we want to match
- * @return NULL or operation informations
- * @ingroup rpn_parse
- */
- const rpn_op_t* rpn_match_token(const char* token);
- /**@brief Return -1 or an index corresponding to @ref rpn_ops
- * @param token The token we want to match
- * @return NULL or operation informations
- * @ingroup rpn_parse
- */
- int rpn_match_token_i(const char* token);
-
- /**@brief Get an integer from a token
- * @param token The token to decode
- * @param result A pointer on the result
- * @return -1 if given token is not a decimal number else 0 is returned
- * and result is set
- * @ingroup rpn_parse
- */
- int rpn_match_number(const char* token, unsigned long *result);
-
- /**@brief Get operations list size
- * @return number of operations in @ref rpn_ops
- */
- size_t rpn_op_sz();
-
- /**@page rpn_lang RPN expression syntax
- * @brief Howto write an expression
- *
- * \section rpn_lang_syntax General syntax
- * An expression is composed of tokens separated by 1 or multiple separation
- * characters (space, newline or tabs).
- *
- * There is 3 types of token (see @ref rpn_token_type_e ) : @ref rpn_lang_op ,
- * @ref rpn_lang_arg and @ref rpn_lang_value .
- *
- * \section rpn_lang_tokens RPN tokens
- * \subsection rpn_lang_arg Arguments
- * Expression can be parametric : arguments are given at evaluation and
- * replaced in expression.
- *
- * In RPN epxressions arguments are desgined by a number (starting from 0)
- * and preffixed by 'A' char.
- *
- * For example an expression evaluating to the sum of their two arguments
- * will be written : "A0 A1 +"
- *
- * \subsection rpn_lang_value Constant values
- * Constant values can be expressed in different bases (the Python syntax) :
- * - 42
- * - 0x2a or 0x2A
- * - 0o52
- * - 0b101010
- *
- * \subsection rpn_lang_op Operations
- * Operations have two form : a short (1 character long) and a long (a string).
- *
- * Each valid operations are declared in @ref rpn_ops variable (see
- * @ref rpn_parse.c for details).
- *
- * The @ref python_module expose a function pyrpn.get_ops() ( @see pyrpn_ops )
- * returning a dict with long operations as key and short as value.
- * \subsubsection rpn_lan_op_internal Internal mechanism
- * Operations are done using a loopstack : operands are poped from stack, and
- * the result is pushed onto it.
- *
- * Operations implementation are wrote in x86_64 linux assembly code ( see
- * @ref rpn_lib.asm ). Each operations has a corresponding exported symbol
- * (declared in @ref rpn_lib.h ) pointing compiled code. This code will be
- * copied in a memory map in order to compile (@ref rpn_cmap ) an evaluation
- * function.
- */
- /**@page rpn_lang_ext Language extension
- * @brief Howto add new operations
- *
- * @section rpn_lang_ext_op Add an operation
- *
- * In order to add a new operation you have to do three things :
- *
- * @subsection rpn_lang_ext_asm Write the operation code
- *
- * You have to write the operation code in @ref rpn_lib.asm and to expose
- * the corresponding symbols (the code label and the code portion size).
- *
- * The macro part_sz allows to do most of the work by :
- * - defining a symbol NAME_sz pointing on the code portion size
- * - export the symbols NAME and NAME_sz
- *
- * @warning The part_sz macro HAS TO be placed at the end of the corresponding
- * code_portion
- *
- * @subsection rpn_lang_ext_head Import the symbols in C headers
- *
- * The @ref rpn_lib.h header is designed to contain all extern assembly symbols
- * (pointer on compiled code and on size).
- *
- * To add a new operation you have to "import" the symbols defined in
- * @ref rpn_lib.asm using the @ref CODE_PART macro
- *
- * @subsection rpn_lang_ext_code Declare corresponding short and long tokens
- *
- * The @ref rpn_compile will match short or long operations and corresponding
- * pre-compiled code.
- *
- * The association between short (char) long (char*) and pre-compiled code is
- * done in the @ref rpn_ops variable.
- * @note The __op macro allow simple operation declaration.
- */
-
- #endif
|