Fast IFS using RPN notation
python
c
x86-64
nasm
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rpn_parse.h 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. /*
  2. * Copyright (C) 2020 Weber Yann
  3. *
  4. * This file is part of pyrpn.
  5. *
  6. * pyrpn is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * any later version.
  10. *
  11. * pyrpn is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with pyrpn. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef __rpn_parse__h__
  20. #define __rpn_parse__h__
  21. #include <stdlib.h>
  22. #include <stdio.h>
  23. #include <string.h>
  24. #include <errno.h>
  25. #include "config.h"
  26. #include "rpn_lib.h"
  27. /**@file rpn_parse.h
  28. * @brief RPN expression parsing headers
  29. * @ingroup rpn_tokenize
  30. *
  31. * Contains headers of @ref rpn_tokenize and @ref rpn_parse .
  32. */
  33. /**@defgroup rpn_tokenize Expression tokenization
  34. * @brief Parsing an expression into a @ref rpn_tokenized_t
  35. *
  36. * The tokenized form ( see @ref rpn_tokenized_t ) of an expression is usefull
  37. * for @ref mutation.
  38. *
  39. * The tokenizing process is done in a way allowing compilation process to
  40. * fetch tokens while parsing the expression (see @ref rpn_tok).
  41. * @ingroup rpn_compile
  42. */
  43. /**@defgroup rpn_parse Token parsing functions
  44. * @brief Internal parsing functions
  45. * @ingroup rpn_tokenize
  46. */
  47. /**@brief Shortcut for loop on all operations list */
  48. #define foreach_rpn_ops(IDX) for(IDX=0; IDX<rpn_op_sz(); IDX++)
  49. /**@brief Check if a tokenizer is in error state
  50. * @param tokenizer Pointer on a @ref rpn_tokenizer_s
  51. * @return false if no error else true
  52. * @note test if first chr of @ref rpn_tokenizer_s::err_reason is "\0"
  53. */
  54. #define rpn_tokenizer_error(tokenizer) (*((tokenizer)->err_reason))
  55. /**@brief Shortcut for struct @ref rpn_op_s */
  56. typedef struct rpn_op_s rpn_op_t;
  57. /**@brief Shortcut for struct @ref rpn_token_type_e */
  58. typedef enum rpn_token_type_e rpn_token_type_t;
  59. /**@brief Shortcut for struct @ref rpn_token_s */
  60. typedef struct rpn_token_s rpn_token_t;
  61. /**@brief Shortcut for struct @ref rpn_tokenized_s */
  62. typedef struct rpn_tokenized_s rpn_tokenized_t;
  63. /**@brief Shortcut for struct @ref rpn_tokenizer_s */
  64. typedef struct rpn_tokenizer_s rpn_tokenizer_t;
  65. /**@brief Handles operation identification informations storage
  66. * @ingroup rpn_tokenize
  67. */
  68. struct rpn_op_s
  69. {
  70. /**@brief Pointer on function pointer */
  71. const void **fun;
  72. /**@brief Function code size */
  73. const unsigned long *fun_sz;
  74. /**@brief Caracter representing operation ('\0' if None)*/
  75. char chr;
  76. /**@brief String representing operation */
  77. char *str;
  78. };
  79. /**@brief Defines @ref rpn_token_s types
  80. * @ingroup rpn_tokenize */
  81. enum rpn_token_type_e {
  82. /**@brief The token is an operation */
  83. RPN_op,
  84. /**@brief The token is an argument */
  85. RPN_arg,
  86. /**@brief The token is a value */
  87. RPN_val
  88. };
  89. /**@brief Represent an expression token (value, argument or operation)
  90. * @ingroup rpn_tokenize */
  91. struct rpn_token_s
  92. {
  93. /**@brief Token type */
  94. rpn_token_type_t type;
  95. /**@brief Token data depending on @ref type */
  96. union {
  97. /**@brief Token data for @ref RPN_op tokens */
  98. struct {
  99. /**@brief Indicate the operation index in @ref rpn_ops */
  100. unsigned char op_n;
  101. /**@brief Pointer on operation informations */
  102. const rpn_op_t *op;
  103. };
  104. /**@brief Indicate the argument number */
  105. unsigned long int arg_n;
  106. /**@brief Indicate the constant value */
  107. unsigned long int value;
  108. };
  109. };
  110. //} __attribute__((aligned));
  111. /**@brief Represent a tokenized expression
  112. *
  113. * A list of @ref rpn_token_s and argc
  114. * @ingroup rpn_tokenize */
  115. struct rpn_tokenized_s
  116. {
  117. /**@brief Number of expected arguments */
  118. size_t argc;
  119. /**@brief The number of token in the expression */
  120. size_t tokens_sz;
  121. /**@brief List of tokens */
  122. rpn_token_t *tokens;
  123. };
  124. /**@brief Handles data will tokenizing
  125. *
  126. * Store compilation state, allowing to return new token as soon as they
  127. * become ready.
  128. * @ingroup rpn_tokenize */
  129. struct rpn_tokenizer_s
  130. {
  131. /**@brief Source expression */
  132. const char *orig;
  133. /**@brief Expression work buffer */
  134. char *buff;
  135. /**@brief Current expression buffer */
  136. char *cur;
  137. /**@brief Current chr number (for error generation & debugging) */
  138. size_t chr_no;
  139. /**@brief The tokenized representation of the expression
  140. * @note Should point on @ref rpn_expr_t::toks */
  141. rpn_tokenized_t *toks;
  142. /**@brief The number of allocated rpn_token_t in toks */
  143. size_t allocated_toks;
  144. /**@brief Tokenization error */
  145. char err_reason[64];
  146. };
  147. /**@brief Define all operations
  148. *
  149. * Stores operation identification informations
  150. * @ingroup rpn_tokenize */
  151. extern const rpn_op_t rpn_ops[];
  152. /**@brief Initialize a tokenizer and a tokenized representation
  153. * @param tokenizer Pointer on a new tokenizer
  154. * @param dst Pointer on a tokenized struct to store generated tokens
  155. * @param expr Pointer on the RPN expression to tokenize
  156. * @param argc Number of argument accepted by expression
  157. * @return 0 if no error else -1 and set @ref rpn_tokenizer_s::err_reason
  158. * @warning no NULL checks for the moment...
  159. * @ingroup rpn_tokenize
  160. */
  161. int rpn_tokenizer_start(rpn_tokenizer_t *tokenizer, rpn_tokenized_t *dst,
  162. const char* expr, size_t argc);
  163. /**@brief Return the next token
  164. * @param tokenizer Pointer on tokenizing task informations
  165. * @return The a pointer on next @ref rpn_token_s in @ref rpn_tokenizer_s::toks
  166. * or NULL if end of expression or error
  167. * @note When NULL is returned all ressources are freed, no need to
  168. * call @ref rpn_tokenizer_free
  169. * @ingroup rpn_tokenize
  170. */
  171. rpn_token_t* rpn_tok(rpn_tokenizer_t *tokenizer);
  172. /**@brief Free ressources of a tokenizer
  173. * @param tokenizer Pointer on the tokenizer we want to deallocate
  174. * @note This method must be used to abord a tokenizing process with no
  175. * error or end of expression encountered
  176. * @ingroup rpn_tokenize
  177. */
  178. void rpn_tokenizer_free(rpn_tokenizer_t *tokenizer);
  179. /**@brief Tokenize a '\0' terminated string
  180. * @param token A '\0' terminated string
  181. * @param dst Pointer on information destination
  182. * @param error Pointer on an error reason buffer
  183. * @return 0 if dst set and token recognized else -1 and set error buffer
  184. * @warning assert token is not empty
  185. * @ingroup rpn_tokenize
  186. */
  187. int rpn_tokenize(const char *token, rpn_token_t *dst, char error[64]);
  188. /**@brief Represented a tokenized expression in a string
  189. * @param tokens Tokenized expression
  190. * @param long_op If true uses @ref rpn_op_s::str else @ref rpn_op_s::chr
  191. * @return A newly allocated char* that should be deallocated using free()
  192. * @ingroup rpn_tokenize
  193. */
  194. char* rpn_tokenized_expr(rpn_tokenized_t *tokens, char long_op);
  195. /**@brief Returns NULL or a pointer on corresponding operation infos
  196. * @param token The token we want to match
  197. * @return NULL or operation informations
  198. * @ingroup rpn_parse
  199. */
  200. const rpn_op_t* rpn_match_token(const char* token);
  201. /**@brief Return -1 or an index corresponding to @ref rpn_ops
  202. * @param token The token we want to match
  203. * @return NULL or operation informations
  204. * @ingroup rpn_parse
  205. */
  206. int rpn_match_token_i(const char* token);
  207. /**@brief Get an integer from a token
  208. * @param token The token to decode
  209. * @param result A pointer on the result
  210. * @return -1 if given token is not a decimal number else 0 is returned
  211. * and result is set
  212. * @ingroup rpn_parse
  213. */
  214. int rpn_match_number(const char* token, unsigned long *result);
  215. /**@brief Get operations list size
  216. * @return number of operations in @ref rpn_ops
  217. */
  218. size_t rpn_op_sz();
  219. /**@page rpn_lang RPN expression syntax
  220. * @brief Howto write an expression
  221. *
  222. * \section rpn_lang_syntax General syntax
  223. * An expression is composed of tokens separated by 1 or multiple separation
  224. * characters (space, newline or tabs).
  225. *
  226. * There is 3 types of token (see @ref rpn_token_type_e ) : @ref rpn_lang_op ,
  227. * @ref rpn_lang_arg and @ref rpn_lang_value .
  228. *
  229. * \section rpn_lang_tokens RPN tokens
  230. * \subsection rpn_lang_arg Arguments
  231. * Expression can be parametric : arguments are given at evaluation and
  232. * replaced in expression.
  233. *
  234. * In RPN epxressions arguments are desgined by a number (starting from 0)
  235. * and preffixed by 'A' char.
  236. *
  237. * For example an expression evaluating to the sum of their two arguments
  238. * will be written : "A0 A1 +"
  239. *
  240. * \subsection rpn_lang_value Constant values
  241. * Constant values can be expressed in different bases (the Python syntax) :
  242. * - 42
  243. * - 0x2a or 0x2A
  244. * - 0o52
  245. * - 0b101010
  246. *
  247. * \subsection rpn_lang_op Operations
  248. * Operations have two form : a short (1 character long) and a long (a string).
  249. *
  250. * Each valid operations are declared in @ref rpn_ops variable (see
  251. * @ref rpn_parse.c for details).
  252. *
  253. * The @ref python_module expose a function pyrpn.get_ops() ( @see pyrpn_ops )
  254. * returning a dict with long operations as key and short as value.
  255. * \subsubsection rpn_lan_op_internal Internal mechanism
  256. * Operations are done using a loopstack : operands are poped from stack, and
  257. * the result is pushed onto it.
  258. *
  259. * Operations implementation are wrote in x86_64 linux assembly code ( see
  260. * @ref rpn_lib.asm ). Each operations has a corresponding exported symbol
  261. * (declared in @ref rpn_lib.h ) pointing compiled code. This code will be
  262. * copied in a memory map in order to compile (@ref rpn_cmap ) an evaluation
  263. * function.
  264. */
  265. /**@page rpn_lang_ext Language extension
  266. * @brief Howto add new operations
  267. *
  268. * @section rpn_lang_ext_op Add an operation
  269. *
  270. * In order to add a new operation you have to do three things :
  271. *
  272. * @subsection rpn_lang_ext_asm Write the operation code
  273. *
  274. * You have to write the operation code in @ref rpn_lib.asm and to expose
  275. * the corresponding symbols (the code label and the code portion size).
  276. *
  277. * The macro part_sz allows to do most of the work by :
  278. * - defining a symbol NAME_sz pointing on the code portion size
  279. * - export the symbols NAME and NAME_sz
  280. *
  281. * @warning The part_sz macro HAS TO be placed at the end of the corresponding
  282. * code_portion
  283. *
  284. * @subsection rpn_lang_ext_head Import the symbols in C headers
  285. *
  286. * The @ref rpn_lib.h header is designed to contain all extern assembly symbols
  287. * (pointer on compiled code and on size).
  288. *
  289. * To add a new operation you have to "import" the symbols defined in
  290. * @ref rpn_lib.asm using the @ref CODE_PART macro
  291. *
  292. * @subsection rpn_lang_ext_code Declare corresponding short and long tokens
  293. *
  294. * The @ref rpn_compile will match short or long operations and corresponding
  295. * pre-compiled code.
  296. *
  297. * The association between short (char) long (char*) and pre-compiled code is
  298. * done in the @ref rpn_ops variable.
  299. * @note The __op macro allow simple operation declaration.
  300. */
  301. #endif