背景
python項目中使用了peewee這款orm框架,在對數據庫更新時有兩種語法,分別是save
和update
方法。有同事說從peewee的日誌來看,update比save更快,於是做了一個簡單的比較實驗,看看真實情況如何。
基礎環境:
python: 3.8.10
peewee: 3.16.2
數據庫:sqlite
準備
插入1w條數據
import datetime
from peewee import AutoField, DateTimeField, Model, SqliteDatabase, TextField, IntegerField
class BaseModel(Model):
"""A base model that will use our Sqlite database."""
id = AutoField()
update_time = DateTimeField(default=datetime.datetime.now)
class Meta:
database = db
class User(BaseModel):
name = TextField()
age = IntegerField()
class Meta:
table_name = "user"
if __name__ == "__main__":
User.truncate_table()
db.connect()
db.create_tables([User])
data = []
for i in range(10000):
data.append({"name": f"person_P{i}", "age": i})
print(i)
User.insert_many(data).execute()
update 更新
if __name__ == "__main__":
import logging
import time
logger = logging.getLogger('peewee')
logger.propagate = False
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)
start = time.time()
for user in users:
User.update(age=6012).where(User.id==user.id).execute()
print(user.id, user.name, user.age)
end = time.time()
print(f"total: {end-start}")
peewee:日誌
('UPDATE "user" SET "update_time" = ?, "name" = ?, "age" = ? WHERE ("user"."id" = ?)', [datetime.datetime(2023, 8, 29, 17, 30, 36, 719081), 'person_P9996', 12341, 10024])
('UPDATE "user" SET "update_time" = ?, "name" = ?, "age" = ? WHERE ("user"."id" = ?)', [datetime.datetime(2023, 8, 29, 17, 30, 36, 719088), 'person_P9997', 12341, 10025])
('UPDATE "user" SET "update_time" = ?, "name" = ?, "age" = ? WHERE ("user"."id" = ?)', [datetime.datetime(2023, 8, 29, 17, 30, 36, 719096), 'person_P9998', 12341, 10026])
('UPDATE "user" SET "update_time" = ?, "name" = ?, "age" = ? WHERE ("user"."id" = ?)', [datetime.datetime(2023, 8, 29, 17, 30, 36, 719103), 'person_P9999', 12341, 10027])
結果:67.96582674980164 s
save更新
if __name__ == "__main__":
import logging
import time
logger = logging.getLogger('peewee')
logger.propagate = False
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)
start = time.time()
for user in users:
user.age = 12341
user.save()
print(user.id, user.name, user.age)
end = time.time()
print(f"total: {end-start}")
peewee日誌:
('UPDATE "user" SET "age" = ? WHERE ("user"."id" = ?)', [2000, 10025])
10025 person_P9997 3000
('UPDATE "user" SET "age" = ? WHERE ("user"."id" = ?)', [2000, 10026])
10026 person_P9998 3000
('UPDATE "user" SET "age" = ? WHERE ("user"."id" = ?)', [2000, 10027])
結果:67.52418804168701 s
結果分析
從三個方面來分析:
- 從打印的日誌來看,
save
會更新記錄全部字段,update
只會更新指定的字段。 - 從結果來看,1w行以內的更新操作兩者沒有性能的差別。
- 從方法實現來看,
update
方法是底層方法,save
方法調用了update方法或insert方法實現更新操作。
所以理論上來說update
比 save
更底層,效率略高。實際使用中save寫法較爲方便,個人更喜歡save方法。
update 方法
def __sql__(self, ctx):
super(Update, self).__sql__(ctx)
with ctx.scope_values(subquery=True):
ctx.literal('UPDATE ')
expressions = []
for k, v in sorted(self._update.items(), key=ctx.column_sort_key):
if not isinstance(v, Node):
if isinstance(k, Field):
v = k.to_value(v)
else:
v = Value(v, unpack=False)
elif isinstance(v, Model) and isinstance(k, ForeignKeyField):
# NB: we want to ensure that when passed a model instance
# in the context of a foreign-key, we apply the fk-specific
# adaptation of the model.
v = k.to_value(v)
if not isinstance(v, Value):
v = qualify_names(v)
expressions.append(NodeList((k, SQL('='), v)))
(ctx
.sql(self.table)
.literal(' SET ')
.sql(CommaNodeList(expressions)))
if self._from:
with ctx.scope_source(parentheses=False):
ctx.literal(' FROM ').sql(CommaNodeList(self._from))
if self._where:
with ctx.scope_normal():
ctx.literal(' WHERE ').sql(self._where)
self._apply_ordering(ctx)
return self.apply_returning(ctx)
update方法可以看到是拼接出一個sql語句,update xx set xx=xx where
save 方法
def save(self, force_insert=False, only=None):
field_dict = self.__data__.copy()
if self._meta.primary_key is not False:
pk_field = self._meta.primary_key
pk_value = self._pk
else:
pk_field = pk_value = None
if only is not None:
field_dict = self._prune_fields(field_dict, only)
elif self._meta.only_save_dirty and not force_insert:
field_dict = self._prune_fields(field_dict, self.dirty_fields)
if not field_dict:
self._dirty.clear()
return False
self._populate_unsaved_relations(field_dict)
rows = 1
if self._meta.auto_increment and pk_value is None:
field_dict.pop(pk_field.name, None)
if pk_value is not None and not force_insert:
if self._meta.composite_key:
for pk_part_name in pk_field.field_names:
field_dict.pop(pk_part_name, None)
else:
field_dict.pop(pk_field.name, None)
if not field_dict:
raise ValueError('no data to save!')
rows = self.update(**field_dict).where(self._pk_expr()).execute()
elif pk_field is not None:
pk = self.insert(**field_dict).execute()
if pk is not None and (self._meta.auto_increment or
pk_value is None):
self._pk = pk
# Although we set the primary-key, do not mark it as dirty.
self._dirty.discard(pk_field.name)
else:
self.insert(**field_dict).execute()
self._dirty -= set(field_dict) # Remove any fields we saved.
return rows
save方法是調用update方法或insert方法間接實現更新。