1
0
Fork 0
mirror of https://github.com/yweber/lodel2.git synced 2025-12-03 17:26:54 +01:00

Starting implementation of relation in datasource

This commit is contained in:
Yann 2016-06-09 10:00:15 +02:00
commit 11027fd959
3 changed files with 243 additions and 102 deletions

View file

@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
import re
import warnings
import bson
from bson.son import SON
from collections import OrderedDict
@ -7,41 +9,56 @@ import pymongo
from pymongo.errors import BulkWriteError
import urllib
from .utils import mongodbconnect, object_collection_name, parse_query_filters, parse_query_order, MONGODB_SORT_OPERATORS_MAP
from lodel import logger
from .utils import mongodbconnect, object_collection_name, MONGODB_SORT_OPERATORS_MAP
class MongoDbDataSourceError(Exception):
pass
class MongoDbDatasource(object):
##@brief Mapping from lodel2 operators to mongodb operator
lodel2mongo_op_map = {
'=':'$eq', '<=':'$lte', '>=':'$gte', '!=':'$ne', '<':'$lt',
'>':'$gt', 'in':'$in', 'not in':'$nin' }
##@brief List of mongodb operators that expect re as value
mongo_op_re = ['$in', '$nin']
wildcard_re = re.compile('[^\\\\]\*')
## @brief instanciates a database object given a connection name
# @param connection_name str
def __init__(self, connection_name):
self.database = mongodbconnect(connection_name)
self.r_database = mongodbconnect(connection_name)
## @brief returns a selection of documents from the datasource
# @param target Emclass
# @param field_list list
# @param filters list : List of filters
# @param rel_filters list : List of relational filters
# @param order list : List of column to order. ex: order = [('title', 'ASC'),]
# @param group list : List of tupple representing the column used as "group by" fields. ex: group = [('title', 'ASC'),]
# @param limit int : Number of records to be returned
# @param offset int: used with limit to choose the start record
# @param instanciate bool : If true, the records are returned as instances, else they are returned as dict
# @return list
# @todo Implement the relations
##@brief returns a selection of documents from the datasource
#@param target Emclass
#@param field_list list
#@param filters list : List of filters
#@param rel_filters list : List of relational filters
#@param order list : List of column to order. ex: order = [('title', 'ASC'),]
#@param group list : List of tupple representing the column used as "group by" fields. ex: group = [('title', 'ASC'),]
#@param limit int : Number of records to be returned
#@param offset int: used with limit to choose the start record
#@param instanciate bool : If true, the records are returned as instances, else they are returned as dict
#@return list
#@todo Implement the relations
def select(self, target, field_list, filters, rel_filters=None, order=None, group=None, limit=None, offset=0, instanciate=True):
collection_name = object_collection_name(target)
collection = self.database[collection_name]
query_filters = parse_query_filters(filters)
query_result_ordering = parse_query_order(order) if order is not None else None
query_filters = self.__process_filters(
target, filters, relational_filters)
query_result_ordering = None
if order is not None:
query_result_ordering = parse_query_order(order)
results_field_list = None if len(field_list) == 0 else field_list
limit = limit if limit is not None else 0
if group is None:
cursor = collection.find(filter=query_filters, projection=results_field_list, skip=offset, limit=limit, sort=query_result_ordering)
cursor = collection.find(
filter=query_filters, projection=results_field_list,
skip=offset, limit=limit, sort=query_result_ordering)
else:
pipeline = list()
unwinding_list = list()
@ -59,7 +76,9 @@ class MongoDbDatasource(object):
pipeline.append({'$match': query_filters})
if results_field_list is not None:
pipeline.append({'$project': SON([{field_name: 1} for field_name in field_list])})
pipeline.append({
'$project': SON([{field_name: 1}
for field_name in field_list])})
pipeline.extend(unwinding_list)
pipeline.append({'$group': grouping_dict})
pipeline.extend({'$sort': SON(sorting_list)})
@ -74,11 +93,12 @@ class MongoDbDatasource(object):
return results
## @brief Deletes one record defined by its uid
# @param target Emclass : class of the record to delete
# @param uid dict|list : a dictionary of fields and values composing the unique identifier of the record or a list of several dictionaries
# @return int : number of deleted records
# @TODO Implement the error management
##@brief Deletes one record defined by its uid
#@param target Emclass : class of the record to delete
#@param uid dict|list : a dictionary of fields and values composing the
# unique identifier of the record or a list of several dictionaries
#@return int : number of deleted records
#@TODO Implement the error management
def delete(self, target, uid):
if isinstance(uid, dict):
uid = [uid]
@ -122,3 +142,181 @@ class MongoDbDatasource(object):
collection = self.database[collection_name]
result = collection.insert_many(datas_list)
return len(result.inserted_ids)
##@brief Return a pymongo collection given a LeObject child class
#@param leobject LeObject child class (no instance)
#return a pymongo.collection instance
def __collection(self, leobject):
return self.database[object_collection_name(leobject)]
##@brief Perform subqueries implies by relational filters and append the
# result to existing filters
#
#The processing is divided in multiple steps :
# - determine (for each relational field of the target) every collection
#that are involved
# - generate subqueries for relational_filters that concerns a different
#collection than target collection
#filters
# - execute subqueries
# - transform subqueries results in filters
# - merge subqueries generated filters with existing filters
#
#@param target LeObject subclass (no instance) : Target class
#@param filters list : List of tuple(FIELDNAME, OP, VALUE)
#@param relational_filters : same composition thant filters except that
# FIELD is represented by a tuple(FIELDNAME, {CLASS1:RFIELD1,
# CLASS2:RFIELD2})
#@return a list of pymongo filters ( dict {FIELD:{OPERATOR:VALUE}} )
def __process_filters(self,target, filters, relational_filters):
# Simple filters lodel2 -> pymongo converting
res = [convert_filter(filt) for filt in filters]
rfilters = self.__prepare_relational_filters(relational_filters)
#Now that everything is well organized, begin to forge subquerie
#filters
subq_filters = self.__subqueries_from_relational_filters(
target, rfilters)
# Executing subqueries, creating filters from result, and injecting
# them in original filters of the query
if len(subq_filters) > 0:
logger.debug("Begining subquery execution")
for fname in subq_filters:
if fname not in res:
res[fname] = dict()
subq_results = set()
for leobject, sq_filters in subq_filters[fname].items():
uid_fname = mongo_fieldname(leobject._uid)
log_msg = "Subquery running on collection {coll} with filters \
'{filters}'"
logger.debug(log_msg.format(
coll=object_collection_name(leobject),
filters=sq_filters))
cursor = self.__collection(leobject).find(
filter=sq_filters,
projection=uid_fname)
subq_results |= set(doc[uid_fname] for doc in cursor)
#generating new filter from result
if '$in' in res[fname]:
#WARNING we allready have a IN on this field, doing dedup
#from result
deduped = set(res[fname]['$in']) & subq
if len(deduped) == 0:
del(res[fname]['$in'])
else:
res[fname]['$in'] = list(deduped)
else:
res[fname]['$in'] = list(subq_results)
if len(subq_filters) > 0:
logger.debug("End of subquery execution")
return res
##@brief Generate subqueries from rfilters tree
#
#Returned struct organization :
# - 1st level keys : relational field name of target
# - 2nd level keys : referenced leobject
# - 3th level values : pymongo filters (dict)
#
#@note The only caller of this method is __process_filters
#@warning No return value, the rfilters arguement is modified by
#reference
#
#@param target LeObject subclass (no instance) : Target class
#@param rfilters dict : A struct as returned by
#MongoDbDatasource.__prepare_relational_filters()
#@return None, the rfilters argument is modified by reference
def __subqueries_from_relational_filters(self, target, rfilters):
for fname in rfilters:
for leobject in rfilters[fname]:
for rfield in rfilters[fname][leobject]:
#This way of doing is not optimized but allows to trigger
#warnings in some case (2 different values for a same op
#on a same field on a same collection)
mongofilters = self.__op_value_listconv(
rfilters[fname][leobject][rfield])
rfilters[fname][leobject][rfield] = mongofilters
##@brief Generate a tree from relational_filters
#
#The generated struct is a dict with :
# - 1st level keys : relational field name of target
# - 2nd level keys : referenced leobject
# - 3th level keys : referenced field in referenced class
# - 4th level values : list of tuple(op, value)
#
#@note The only caller of this method is __process_filters
#@warning An assertion is done : if two leobject are stored in the same
#collection they share the same uid
#
#@param target LeObject subclass (no instance) : Target class
#@param relational_filters : same composition thant filters except that
#@return a struct as described above
def __prepare_relational_filters(self, target, relational_filters):
# We are going to regroup relationnal filters by reference field
# then by collection
rfilters = dict()
for (fname, rfields), op, value in relational_filters:
if fname not in rfilters:
rfilters[fname] = dict()
rfilters[fname] = dict()
# Stores the representative leobject for associated to a collection
# name
leo_collname = dict()
# WARNING ! Here we assert that all leobject that are stored
# in a same collection are identified by the same field
for leobject, rfield in rfields.items():
#here we are filling a dict with leobject as index but
#we are doing a UNIQ on collection name
cur_collname = object_collection_name(leobject)
if cur_collname not in collnames:
leo_collname[cur_collame] = leobject
rfilters[fname][leobject] = dict()
#Fecthing the collection's representative leobject
repr_leo = leo_collname[cur_collname]
if rfield not in rfilters[fname][repr_leo]:
rfilters[fname][repr_leo][rfield] = list()
rfilters[fname][repr_leo][rfield].append((op, value))
return rfilters
##@brief Convert lodel2 operator and value to pymongo struct
#
#Convertion is done using MongoDbDatasource::lodel2mongo_op_map
#@param op str : take value in LeFilteredQuery::_query_operators
#@param value mixed : the value
#@return a tuple(mongo_op, mongo_value)
def __op_value_conv(self, op, value):
if op not in self.lodel2mongo_op_map:
msg = "Invalid operator '%s' found" % op
raise MongoDbDataSourceError(msg)
mongop = self.lodel2mongo_op_map[op]
mongoval = value
#Converting lodel2 wildcarded string into a case insensitive
#mongodb re
if mongop in self.mon_op_re:
#unescaping \
mongoval = value.replace('\\\\','\\')
if not mongoval.startswith('*'):
mongoval = '^'+mongoval
#For the end of the string it's harder to detect escaped *
if not (mongoval[-1] == '*' and mongoval[-2] != '\\'):
mongoval += '$'
#Replacing every other unescaped wildcard char
mongoval = self.wildcard_re.sub('.*', mongoval)
mongoval = {'$regex': mongoval, '$options': 'i'}
return (op, mongoval)
##@brief Convert a list of tuple(OP, VALUE) into a pymongo filter dict
#@return a dict with mongo op as key and value as value...
def __op_value_listconv(self, op_value_list):
result = dict()
for op, value in op_value_list:
mongop, mongoval = self.__op_value_conv(op, value)
if mongop in result:
warnings.warn("Duplicated value given for a single \
field/operator couple in a query. We will keep only the first one")
else:
result[mongop] = mongoval
return result