Fast IFS using RPN notation
python
c
x86-64
nasm
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rpn_parse.h 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. /*
  2. * Copyright (C) 2020 Weber Yann
  3. *
  4. * This file is part of pyrpn.
  5. *
  6. * pyrpn is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * any later version.
  10. *
  11. * pyrpn is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with pyrpn. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef __rpn_parse__h__
  20. #define __rpn_parse__h__
  21. #include <stdlib.h>
  22. #include <stdio.h>
  23. #include <string.h>
  24. #include <errno.h>
  25. #include "config.h"
  26. #include "rpn_lib.h"
  27. /**@file rpn_parse.h
  28. * @brief RPN expression parsing headers
  29. * @ingroup rpn_tokenize
  30. *
  31. * Contains headers of @ref rpn_tokenize and @ref rpn_parse .
  32. */
  33. /**@defgroup rpn_tokenize Expression tokenization
  34. * @brief Parsing an expression into a @ref rpn_tokenized_t
  35. *
  36. * The tokenized form ( see @ref rpn_tokenized_t ) of an expression is usefull
  37. * for mutations (see @ref rpn_mutate.h ).
  38. *
  39. * The tokenizing process is done in a way allowing compilation process to
  40. * fetch tokens while parsing the expression (see @ref rpn_tok).
  41. * @ingroup rpn_compile
  42. */
  43. /**@defgroup rpn_parse Token parsing functions
  44. * @brief Internal parsing functions
  45. * @ingroup rpn_tokenize
  46. */
  47. /**@brief Shortcut for loop on all operations list */
  48. #define foreach_rpn_ops(IDX) for(IDX=0; IDX<RPN_OP_SZ; IDX++)
  49. /**@brief Check if a tokenizer is in error state
  50. * @param tokenizer Pointer on a @ref rpn_tokenizer_s
  51. * @return false if no error else true
  52. * @note test if first chr of @ref rpn_tokenizer_s::err_reason is "\0"
  53. */
  54. #define rpn_tokenizer_error(tokenizer) (*((tokenizer)->err_reason))
  55. /**@brief Shortcut for struct @ref rpn_op_s */
  56. typedef struct rpn_op_s rpn_op_t;
  57. /**@brief Shortcut for struct @ref rpn_token_type_e */
  58. typedef enum rpn_token_type_e rpn_token_type_t;
  59. /**@brief Shortcut for struct @ref rpn_token_s */
  60. typedef struct rpn_token_s rpn_token_t;
  61. /**@brief Shortcut for struct @ref rpn_tokenized_s */
  62. typedef struct rpn_tokenized_s rpn_tokenized_t;
  63. /**@brief Shortcut for struct @ref rpn_tokenizer_s */
  64. typedef struct rpn_tokenizer_s rpn_tokenizer_t;
  65. /**@brief Handles operation identification informations storage
  66. * @ingroup rpn_tokenize
  67. */
  68. struct rpn_op_s
  69. {
  70. /**@brief Pointer on function pointer */
  71. const void **fun;
  72. /**@brief Function code size */
  73. const unsigned long *fun_sz;
  74. /**@brief Caracter representing operation ('\0' if None)*/
  75. char chr;
  76. /**@brief String representing operation */
  77. char *str;
  78. };
  79. /**@brief Defines @ref rpn_token_s types
  80. * @ingroup rpn_tokenize */
  81. enum rpn_token_type_e {
  82. /**@brief The token is an operation */
  83. RPN_op,
  84. /**@brief The token is an argument */
  85. RPN_arg,
  86. /**@brief The token is a value */
  87. RPN_val,
  88. };
  89. /**@brief Represent an expression token (value, argument or operation)
  90. * @ingroup rpn_tokenize */
  91. struct rpn_token_s
  92. {
  93. /**@brief Token type */
  94. rpn_token_type_t type;
  95. /**@brief Token data depending on @ref type */
  96. union {
  97. /**@brief Token data for @ref RPN_op tokens */
  98. struct {
  99. /**@brief Indicate the operation index in @ref rpn_ops */
  100. unsigned char op_n;
  101. /**@brief Pointer on operation informations */
  102. const rpn_op_t *op;
  103. };
  104. /**@brief Indicate the argument number */
  105. unsigned long int arg_n;
  106. /**@brief Indicate the constant value */
  107. unsigned long int value;
  108. };
  109. };
  110. //} __attribute__((aligned));
  111. /**@brief Represent a tokenized expression
  112. *
  113. * A list of @ref rpn_token_s and argc
  114. * @ingroup rpn_tokenize */
  115. struct rpn_tokenized_s
  116. {
  117. /**@brief Number of expected arguments */
  118. size_t argc;
  119. /**@brief The number of token in the expression */
  120. size_t tokens_sz;
  121. /**@brief List of tokens */
  122. rpn_token_t *tokens;
  123. };
  124. /**@brief Handles data will tokenizing
  125. *
  126. * Store compilation state, allowing to return new token as soon as they
  127. * become ready.
  128. * @ingroup rpn_tokenize */
  129. struct rpn_tokenizer_s
  130. {
  131. /**@brief Source expression */
  132. const char *orig;
  133. /**@brief Expression work buffer */
  134. char *buff;
  135. /**@brief Current expression buffer */
  136. char *cur;
  137. /**@brief Current chr number (for error generation & debugging) */
  138. size_t chr_no;
  139. /**@brief The tokenized representation of the expression
  140. * @note Should point on @ref rpn_expr_t::toks */
  141. rpn_tokenized_t *toks;
  142. /**@brief The number of allocated rpn_token_t in toks */
  143. size_t allocated_toks;
  144. /**@brief Tokenization error */
  145. char err_reason[64];
  146. };
  147. /**@brief Define all operations
  148. *
  149. * Stores operation identification informations
  150. * @ingroup rpn_tokenize */
  151. extern const rpn_op_t rpn_ops[];
  152. /**@brief The count of operand (the size of @ref rpn_ops array) */
  153. extern const size_t RPN_OP_SZ;
  154. /**@brief Initialize a tokenizer and a tokenized representation
  155. * @param tokenizer Pointer on a new tokenizer
  156. * @param dst Pointer on a tokenized struct to store generated tokens
  157. * @param expr Pointer on the RPN expression to tokenize
  158. * @param argc Number of argument accepted by expression
  159. * @return 0 if no error else -1 and set @ref rpn_tokenizer_s::err_reason
  160. * @warning no NULL checks for the moment...
  161. * @ingroup rpn_tokenize
  162. */
  163. int rpn_tokenizer_start(rpn_tokenizer_t *tokenizer, rpn_tokenized_t *dst,
  164. const char* expr, size_t argc);
  165. /**@brief Return the next token
  166. * @param tokenizer Pointer on tokenizing task informations
  167. * @return The a pointer on next @ref rpn_token_s in @ref rpn_tokenizer_s::toks
  168. * or NULL if end of expression or error
  169. * @note When NULL is returned all ressources are freed, no need to
  170. * call @ref rpn_tokenizer_free
  171. * @ingroup rpn_tokenize
  172. */
  173. rpn_token_t* rpn_tok(rpn_tokenizer_t *tokenizer);
  174. /**@brief Free ressources of a tokenizer
  175. * @param tokenizer Pointer on the tokenizer we want to deallocate
  176. * @note This method must be used to abord a tokenizing process with no
  177. * error or end of expression encountered
  178. * @ingroup rpn_tokenize
  179. */
  180. void rpn_tokenizer_free(rpn_tokenizer_t *tokenizer);
  181. /**@brief Tokenize a '\0' terminated string
  182. * @param token A '\0' terminated string
  183. * @param dst Pointer on information destination
  184. * @param error Pointer on an error reason buffer
  185. * @return 0 if dst set and token recognized else -1 and set error buffer
  186. * @warning assert token is not empty
  187. * @ingroup rpn_tokenize
  188. */
  189. int rpn_tokenize(const char *token, rpn_token_t *dst, char error[64]);
  190. /**@brief Represent a tokenized expression in a string
  191. * @param tokens Tokenized expression
  192. * @param long_op If true uses @ref rpn_op_s::str else @ref rpn_op_s::chr
  193. * @return A newly allocated char* that should be deallocated using free()
  194. * @ingroup rpn_tokenize
  195. */
  196. char* rpn_tokenized_expr(const rpn_tokenized_t *tokens, char long_op);
  197. /**@brief Returns NULL or a pointer on corresponding operation infos
  198. * @param token The token we want to match
  199. * @return NULL or operation informations
  200. * @ingroup rpn_parse
  201. */
  202. const rpn_op_t* rpn_match_token(const char* token);
  203. /**@brief Returns NULL or pointer on corresponding operation infos
  204. * @param opcode (index in @ref rpn_ops )
  205. * @return NULL or operation informations
  206. */
  207. const rpn_op_t* rpn_op_from_opcode(unsigned char opcode);
  208. /**@brief Return -1 or an index corresponding to @ref rpn_ops
  209. * @param token The token we want to match
  210. * @return NULL or operation informations
  211. * @ingroup rpn_parse
  212. */
  213. int rpn_match_token_i(const char* token);
  214. /**@brief Get an integer from a token
  215. * @param token The token to decode
  216. * @param result A pointer on the result
  217. * @return -1 if given token is not a decimal number else 0 is returned
  218. * and result is set
  219. * @ingroup rpn_parse
  220. */
  221. int rpn_match_number(const char* token, unsigned long *result);
  222. /**@brief Stores a token string representation in given buffer
  223. * @param token The token to represent
  224. * @param dst The destination buffer for the string
  225. * @param sz The biffer size
  226. * @return Same as snprintf (the number of chr stored or negative value on error
  227. */
  228. int rpn_token_snprintf(rpn_token_t *token, char *dst, size_t sz);
  229. /**@brief Get operations list size
  230. * @return number of operations in @ref rpn_ops
  231. */
  232. size_t rpn_op_sz();
  233. /**@brief Macro version of @ref rpn_op_sz() */
  234. #define RPN_OPS_SZ (sizeof(rpn_ops)/sizeof(rpn_op_t))
  235. /**@page rpn_lang RPN expression syntax
  236. * @brief Howto write an expression
  237. *
  238. * \section rpn_lang_syntax General syntax
  239. * An expression is composed of tokens separated by 1 or multiple separation
  240. * characters (space, newline or tabs).
  241. *
  242. * There is 3 types of token (see @ref rpn_token_type_e ) : @ref rpn_lang_op ,
  243. * @ref rpn_lang_arg and @ref rpn_lang_value .
  244. *
  245. * \section rpn_lang_tokens RPN tokens
  246. * \subsection rpn_lang_arg Arguments
  247. * Expression can be parametric : arguments are given at evaluation and
  248. * replaced in expression.
  249. *
  250. * In RPN epxressions arguments are desgined by a number (starting from 0)
  251. * and preffixed by 'A' char.
  252. *
  253. * For example an expression evaluating to the sum of their two arguments
  254. * will be written : "A0 A1 +"
  255. *
  256. * \subsection rpn_lang_value Constant values
  257. * Constant values can be expressed in different bases (the Python syntax) :
  258. * - 42
  259. * - 0x2a or 0x2A
  260. * - 0o52
  261. * - 0b101010
  262. *
  263. * \subsection rpn_lang_op Operations
  264. * Operations have two form : a short (1 character long) and a long (a string).
  265. *
  266. * Each valid operations are declared in @ref rpn_ops variable (see
  267. * @ref rpn_parse.c for details).
  268. *
  269. * The @ref pymod_pyrpn expose a function pyrpn.get_ops() ( @see pyrpn_ops )
  270. * returning a dict with long operations as key and short as value.
  271. * \subsubsection rpn_lan_op_internal Internal mechanism
  272. * Operations are done using a loopstack : operands are poped from stack, and
  273. * the result is pushed onto it.
  274. *
  275. * Operations implementation are wrote in x86_64 linux assembly code ( see
  276. * @ref rpn_lib.asm ). Each operations has a corresponding exported symbol
  277. * (declared in @ref rpn_lib.h ) pointing compiled code. This code will be
  278. * copied in a memory map in order to compile (@ref rpn_cmap ) an evaluation
  279. * function.
  280. */
  281. /**@page rpn_lang_ext Language extension
  282. * @brief Howto add new operations
  283. *
  284. * @section rpn_lang_ext_op Add an operation
  285. *
  286. * In order to add a new operation you have to do three things :
  287. *
  288. * @subsection rpn_lang_ext_asm Write the operation code
  289. *
  290. * You have to write the operation code in @ref rpn_lib.asm and to expose
  291. * the corresponding symbols (the code label and the code portion size).
  292. *
  293. * The macro part_sz allows to do most of the work by :
  294. * - defining a symbol NAME_sz pointing on the code portion size
  295. * - export the symbols NAME and NAME_sz
  296. *
  297. * @warning The part_sz macro HAS TO be placed at the end of the corresponding
  298. * code_portion
  299. *
  300. * @subsection rpn_lang_ext_head Import the symbols in C headers
  301. *
  302. * The @ref rpn_lib.h header is designed to contain all extern assembly symbols
  303. * (pointer on compiled code and on size).
  304. *
  305. * To add a new operation you have to "import" the symbols defined in
  306. * @ref rpn_lib.asm using the @ref CODE_PART macro
  307. *
  308. * @subsection rpn_lang_ext_code Declare corresponding short and long tokens
  309. *
  310. * The @ref rpn_compile will match short or long operations and corresponding
  311. * pre-compiled code.
  312. *
  313. * The association between short (char) long (char*) and pre-compiled code is
  314. * done in the @ref rpn_ops variable.
  315. * @note The __op macro allow simple operation declaration.
  316. */
  317. #endif