in each document,
the records
array containing many duplicated objects.
and in buy_items
there containing many duplicated items.
how clean duplicated items ?
original documents:
{ "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e", "records": [ { "date": new date("1996-02-08t08:00:00+0800"), "buy_items": [ "5210 ", "5210 ", "5210 " ] }, { "date": new date("1996-02-08t08:00:00+0800"), "buy_items": [ "5210 ", "5210 ", "5210 " ] } { "date": new date("2012-12-08t08:00:00+0800"), "buy_items": [ "5210 ", "1234 ", " " ] } ] }
expected output:
{ "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e", "records": [ { "date": new date("1996-02-08t08:00:00+0800"), "buy_items": [ "5210 " ] }, { "date": new date("2012-12-08t08:00:00+0800"), "buy_items": [ "5210 ", "1234 ", " " ] } ] }
with michaels solution, output might looks this
{ "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e", "records": [ "date": new date("1996-02-08t08:00:00+0800"), "buy_items": [ "5210 " "1234 ", " " ] ] }
you can remove duplicated objects using aggregation framework
db.collection.aggregate( [ { $unwind: "$records" }, { $unwind: "$records.buy_items" }, { $group: { "_id": {id: "$_id", date: "$records.date" }, buy_items: { $addtoset: "$records.buy_items" }}}, { $group: {"_id": "$_id.id", records: { $push: {"date": "$_id.date", "buy_items": "$buy_items" }}}}, { $sort: { "records.0.date": 1 }} , { $out: "collection" } ] )
the $out
operator let write aggregation result in specified collection or replace existing collection.
even better using "bulk" operations
var bulk = bulk = db.collection.initializeorderedbulkop(), count = 0; db.collection.aggregate([ { "$unwind": "$records" }, { "$project": { "date": "$records.date", "buy_items": { "$setintersection": "$records.buy_items" } }}, { "$unwind": "$buy_items" }, { "$group": { "_id": { "id": "$_id", "date": "$date" }, "buy_items": { "$addtoset": "$buy_items" } }}, { "$group": { "_id": "$_id.id", "records": { "$push": { "date": "$_id.date", "buy_items": "$buy_items" }} }} ]).foreach(function(doc) { bulk.find({"_id": doc._id}).updateone({ "$set": { "records": doc.records } }); count++; if (count % 500 == 0) { bulk.execute(); bulk = db.collection.initializeorderedbulkop(); } }) if (count % 500 != 0) bulk.execute();
result:
{ "_id" : "0005d116qwwewdq82a1b84f148fa6027d429f3e", "records" : [ { "date" : isodate("2012-12-08t00:00:00z"), "buy_items" : [ " ", "1234 ", "5210 " ] }, { "date" : isodate("1996-02-08t00:00:00z"), "buy_items" : [ "5210 " ] } ] }
Comments
Post a Comment