mongodb - How could I remove the duplicated items(complex object) from array -


in each document,

the records array containing many duplicated objects.

and in buy_items there containing many duplicated items.

how clean duplicated items ?

original documents:

{   "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",   "records": [     {       "date": new date("1996-02-08t08:00:00+0800"),       "buy_items": [         "5210 ",         "5210 ",         "5210 "       ]     },     {       "date": new date("1996-02-08t08:00:00+0800"),       "buy_items": [         "5210 ",         "5210 ",         "5210 "       ]     }     {       "date": new date("2012-12-08t08:00:00+0800"),       "buy_items": [         "5210 ",         "1234 ",         " "       ]     }             ] } 

expected output:

{   "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",   "records": [     {       "date": new date("1996-02-08t08:00:00+0800"),       "buy_items": [         "5210 "       ]     },     {       "date": new date("2012-12-08t08:00:00+0800"),       "buy_items": [         "5210 ",         "1234 ",         " "       ]     }         ] } 

with michaels solution, output might looks this

{   "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",   "records": [     "date": new date("1996-02-08t08:00:00+0800"),       "buy_items": [         "5210 "         "1234 ",         " "       ]     ] } 

you can remove duplicated objects using aggregation framework

db.collection.aggregate(     [         { $unwind: "$records" },          { $unwind: "$records.buy_items" },          { $group: { "_id": {id: "$_id", date: "$records.date" }, buy_items: { $addtoset: "$records.buy_items" }}},          { $group: {"_id": "$_id.id", records: { $push: {"date": "$_id.date", "buy_items": "$buy_items" }}}}, { $sort: { "records.0.date": 1 }} ,         { $out: "collection" }     ] ) 

the $out operator let write aggregation result in specified collection or replace existing collection.


even better using "bulk" operations

var bulk = bulk = db.collection.initializeorderedbulkop(),     count = 0;  db.collection.aggregate([     { "$unwind": "$records" },      { "$project": {          "date": "$records.date",          "buy_items": { "$setintersection": "$records.buy_items" }     }},      { "$unwind": "$buy_items" },      { "$group": {          "_id": { "id": "$_id", "date": "$date" },          "buy_items": { "$addtoset": "$buy_items" }     }},     { "$group": {          "_id": "$_id.id",          "records": { "$push": {              "date": "$_id.date",              "buy_items": "$buy_items"          }}     }} ]).foreach(function(doc) {         bulk.find({"_id": doc._id}).updateone({        "$set": { "records": doc.records }        });         count++;         if (count % 500 == 0) {               bulk.execute();                bulk = db.collection.initializeorderedbulkop();         }  })  if (count % 500 != 0)     bulk.execute(); 

result:

{     "_id" : "0005d116qwwewdq82a1b84f148fa6027d429f3e",     "records" : [             {                     "date" : isodate("2012-12-08t00:00:00z"),                     "buy_items" : [                             " ",                             "1234 ",                             "5210 "                     ]             },             {                     "date" : isodate("1996-02-08t00:00:00z"),                     "buy_items" : [                             "5210 "                     ]             }     ] } 

Comments