Starting implementation of relation in datasource

2025-12-03 17:26:54 +01:00 · 2016-06-09 10:00:15 +02:00 · 2016-06-09 10:00:15 +02:00 · 11027fd959
commit 11027fd959
parent afadd92b3d
3 changed files with 243 additions and 102 deletions
--- a/plugins/mongodb_datasource/main.py
+++ b/plugins/mongodb_datasource/main.py
@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-

+import re
+import warnings
 import bson
 from bson.son import SON
 from collections import OrderedDict
@ -7,41 +9,56 @@ import pymongo
 from pymongo.errors import BulkWriteError
 import urllib

-from .utils import mongodbconnect, object_collection_name, parse_query_filters, parse_query_order, MONGODB_SORT_OPERATORS_MAP
+from lodel import logger
+
+from .utils import mongodbconnect, object_collection_name, MONGODB_SORT_OPERATORS_MAP

 class MongoDbDataSourceError(Exception):
    pass


 class MongoDbDatasource(object):
+    
+    ##@brief Mapping from lodel2 operators to mongodb operator
+    lodel2mongo_op_map = {
+        '=':'$eq', '<=':'$lte', '>=':'$gte', '!=':'$ne', '<':'$lt',
+        '>':'$gt', 'in':'$in', 'not in':'$nin' }
+    ##@brief List of mongodb operators that expect re as value
+    mongo_op_re = ['$in', '$nin']
+    wildcard_re = re.compile('[^\\\\]\*')

    ## @brief instanciates a database object given a connection name
    # @param connection_name str
    def __init__(self, connection_name):
-        self.database = mongodbconnect(connection_name)
+        self.r_database = mongodbconnect(connection_name)

-    ## @brief returns a selection of documents from the datasource
-    # @param target Emclass
-    # @param field_list list
-    # @param filters list : List of filters
-    # @param rel_filters list : List of relational filters
-    # @param order list : List of column to order. ex: order = [('title', 'ASC'),]
-    # @param group list : List of tupple representing the column used as "group by" fields. ex: group = [('title', 'ASC'),]
-    # @param limit int : Number of records to be returned
-    # @param offset int: used with limit to choose the start record
-    # @param instanciate bool : If true, the records are returned as instances, else they are returned as dict
-    # @return list
-    # @todo Implement the relations
+    ##@brief returns a selection of documents from the datasource
+    #@param target Emclass
+    #@param field_list list
+    #@param filters list : List of filters
+    #@param rel_filters list : List of relational filters
+    #@param order list : List of column to order. ex: order = [('title', 'ASC'),]
+    #@param group list : List of tupple representing the column used as "group by" fields. ex: group = [('title', 'ASC'),]
+    #@param limit int : Number of records to be returned
+    #@param offset int: used with limit to choose the start record
+    #@param instanciate bool : If true, the records are returned as instances, else they are returned as dict
+    #@return list
+    #@todo Implement the relations
    def select(self, target, field_list, filters, rel_filters=None, order=None, group=None, limit=None, offset=0, instanciate=True):
        collection_name = object_collection_name(target)
        collection = self.database[collection_name]
-        query_filters = parse_query_filters(filters)
-        query_result_ordering = parse_query_order(order) if order is not None else None
+        query_filters = self.__process_filters(
+            target, filters, relational_filters)
+        query_result_ordering = None
+        if order is not None:
+            query_result_ordering = parse_query_order(order)
        results_field_list = None if len(field_list) == 0 else field_list
        limit = limit if limit is not None else 0

        if group is None:
-            cursor = collection.find(filter=query_filters, projection=results_field_list, skip=offset, limit=limit, sort=query_result_ordering)
+            cursor = collection.find(
+                filter=query_filters, projection=results_field_list,
+                skip=offset, limit=limit, sort=query_result_ordering)
        else:
            pipeline = list()
            unwinding_list = list()
@ -59,7 +76,9 @@ class MongoDbDatasource(object):

            pipeline.append({'$match': query_filters})
            if results_field_list is not None:
-                pipeline.append({'$project': SON([{field_name: 1} for field_name in field_list])})
+                pipeline.append({
+                    '$project': SON([{field_name: 1} 
+                    for field_name in field_list])})
            pipeline.extend(unwinding_list)
            pipeline.append({'$group': grouping_dict})
            pipeline.extend({'$sort': SON(sorting_list)})
@ -74,11 +93,12 @@ class MongoDbDatasource(object):

        return results

-    ## @brief Deletes one record defined by its uid
-    # @param target Emclass : class of the record to delete
-    # @param uid dict|list : a dictionary of fields and values composing the unique identifier of the record or a list of several dictionaries
-    # @return int : number of deleted records
-    # @TODO Implement the error management
+    ##@brief Deletes one record defined by its uid
+    #@param target Emclass : class of the record to delete
+    #@param uid dict|list : a dictionary of fields and values composing the 
+    # unique identifier of the record or a list of several dictionaries
+    #@return int : number of deleted records
+    #@TODO Implement the error management
    def delete(self, target, uid):
        if isinstance(uid, dict):
            uid = [uid]
@ -122,3 +142,181 @@ class MongoDbDatasource(object):
        collection = self.database[collection_name]
        result = collection.insert_many(datas_list)
        return len(result.inserted_ids)
+    
+    ##@brief Return a pymongo collection given a LeObject child class
+    #@param leobject LeObject child class (no instance)
+    #return a pymongo.collection instance
+    def __collection(self, leobject):
+        return self.database[object_collection_name(leobject)]
+
+    ##@brief Perform subqueries implies by relational filters and append the
+    # result to existing filters
+    #
+    #The processing is divided in multiple steps :
+    # - determine (for each relational field of the target)  every collection 
+    #that are involved
+    # - generate subqueries for relational_filters that concerns a different 
+    #collection than target collection
+    #filters
+    # - execute subqueries
+    # - transform subqueries results in filters
+    # - merge subqueries generated filters with existing filters
+    #
+    #@param target LeObject subclass (no instance) : Target class
+    #@param filters list : List of tuple(FIELDNAME, OP, VALUE)
+    #@param relational_filters : same composition thant filters except that
+    # FIELD is represented by a tuple(FIELDNAME, {CLASS1:RFIELD1, 
+    # CLASS2:RFIELD2})
+    #@return a list of pymongo filters ( dict {FIELD:{OPERATOR:VALUE}} )
+    def __process_filters(self,target, filters, relational_filters):
+        # Simple filters lodel2 -> pymongo converting
+        res = [convert_filter(filt) for filt in filters]
+        rfilters = self.__prepare_relational_filters(relational_filters)
+        #Now that everything is well organized, begin to forge subquerie
+        #filters
+        subq_filters = self.__subqueries_from_relational_filters(
+            target, rfilters)
+        # Executing subqueries, creating filters from result, and injecting
+        # them in original filters of the query
+        if len(subq_filters) > 0:
+            logger.debug("Begining subquery execution")
+        for fname in subq_filters:
+            if fname not in res:
+                res[fname] = dict()
+            subq_results = set()
+            for leobject, sq_filters in subq_filters[fname].items():
+                uid_fname = mongo_fieldname(leobject._uid)
+                log_msg = "Subquery running on collection {coll} with filters \
+'{filters}'"
+                logger.debug(log_msg.format(
+                    coll=object_collection_name(leobject),
+                    filters=sq_filters))
+
+                cursor = self.__collection(leobject).find(
+                    filter=sq_filters,
+                    projection=uid_fname)
+                subq_results |= set(doc[uid_fname] for doc in cursor)
+            #generating new filter from result
+            if '$in' in res[fname]:
+                #WARNING we allready have a IN on this field, doing dedup
+                #from result
+                deduped = set(res[fname]['$in']) & subq
+                if len(deduped) == 0:
+                    del(res[fname]['$in'])
+                else:
+                    res[fname]['$in'] = list(deduped)
+            else:
+                res[fname]['$in'] = list(subq_results)
+        if len(subq_filters) > 0:
+            logger.debug("End of subquery execution")
+        return res
+    
+    ##@brief Generate subqueries from rfilters tree
+    #
+    #Returned struct organization :
+    # - 1st level keys : relational field name of target
+    # - 2nd level keys : referenced leobject
+    # - 3th level values : pymongo filters (dict)
+    #
+    #@note The only caller of this method is __process_filters
+    #@warning No return value, the rfilters arguement is modified by
+    #reference
+    #
+    #@param target LeObject subclass (no instance) : Target class
+    #@param rfilters dict : A struct as returned by 
+    #MongoDbDatasource.__prepare_relational_filters()
+    #@return None, the rfilters argument is modified by reference
+    def __subqueries_from_relational_filters(self, target, rfilters):
+        for fname in rfilters:
+            for leobject in rfilters[fname]:
+                for rfield in rfilters[fname][leobject]:
+                    #This way of doing is not optimized but allows to trigger
+                    #warnings in some case (2 different values for a same op
+                    #on a same field on a same collection)
+                    mongofilters = self.__op_value_listconv(
+                        rfilters[fname][leobject][rfield])
+                    rfilters[fname][leobject][rfield] = mongofilters
+    
+    ##@brief Generate a tree from relational_filters
+    #
+    #The generated struct is a dict with :
+    # - 1st level keys : relational field name of target
+    # - 2nd level keys : referenced leobject
+    # - 3th level keys : referenced field in referenced class
+    # - 4th level values : list of tuple(op, value)
+    #
+    #@note The only caller of this method is __process_filters
+    #@warning An assertion is done : if two leobject are stored in the same
+    #collection they share the same uid
+    #
+    #@param target LeObject subclass (no instance) : Target class
+    #@param relational_filters : same composition thant filters except that
+    #@return a struct as described above
+    def __prepare_relational_filters(self, target, relational_filters):
+        # We are going to regroup relationnal filters by reference field
+        # then by collection
+        rfilters = dict()
+        for (fname, rfields), op, value in relational_filters:
+            if fname not in rfilters:
+                rfilters[fname] = dict()
+            rfilters[fname] = dict()
+            # Stores the representative leobject for associated to a collection
+            # name
+            leo_collname = dict() 
+            # WARNING ! Here we assert that all leobject that are stored
+            # in a same collection are identified by the same field
+            for leobject, rfield in rfields.items():
+                #here we are filling a dict with leobject as index but
+                #we are doing a UNIQ on collection name
+                cur_collname = object_collection_name(leobject)
+                if cur_collname not in collnames:
+                    leo_collname[cur_collame] = leobject
+                    rfilters[fname][leobject] = dict()
+                #Fecthing the collection's representative leobject
+                repr_leo = leo_collname[cur_collname]
+
+                if rfield not in rfilters[fname][repr_leo]:
+                    rfilters[fname][repr_leo][rfield] = list()
+                rfilters[fname][repr_leo][rfield].append((op, value))
+        return rfilters
+    
+    ##@brief Convert lodel2 operator and value to pymongo struct
+    #
+    #Convertion is done using MongoDbDatasource::lodel2mongo_op_map
+    #@param op str : take value in LeFilteredQuery::_query_operators
+    #@param value mixed : the value
+    #@return a tuple(mongo_op, mongo_value)
+    def __op_value_conv(self, op, value):
+        if op not in self.lodel2mongo_op_map:
+            msg = "Invalid operator '%s' found" % op
+            raise MongoDbDataSourceError(msg)
+        mongop = self.lodel2mongo_op_map[op]
+        mongoval = value
+        #Converting lodel2 wildcarded string into a case insensitive
+        #mongodb re
+        if mongop in self.mon_op_re:
+            #unescaping \
+            mongoval = value.replace('\\\\','\\')
+            if not mongoval.startswith('*'):
+                mongoval = '^'+mongoval
+            #For the end of the string it's harder to detect escaped *
+            if not (mongoval[-1] == '*' and mongoval[-2] != '\\'):
+                mongoval += '$'
+            #Replacing every other unescaped wildcard char
+            mongoval = self.wildcard_re.sub('.*', mongoval)
+            mongoval = {'$regex': mongoval, '$options': 'i'}
+        return (op, mongoval)
+    
+    ##@brief Convert a list of tuple(OP, VALUE) into a pymongo filter dict
+    #@return a dict with mongo op as key and value as value...
+    def __op_value_listconv(self, op_value_list):
+        result = dict()
+        for op, value in op_value_list:
+            mongop, mongoval = self.__op_value_conv(op, value)
+            if mongop in result:
+                warnings.warn("Duplicated value given for a single \
+field/operator couple in a query. We will keep only the first one")
+            else:
+                result[mongop] = mongoval
+        return result
+