EntryDataVacuate.py
23.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
from osgeo.ogr import *
from osgeo import ogr
from osgeo import gdal
import os
import uuid
import shutil
from app.modules.data.models import *
from app.util.component.PGUtil import PGUtil
from app.util.component.StructurePrint import StructurePrint
from sqlalchemy.orm import Session
import configure
import traceback
import copy
from app.util.component.GeometryAdapter import GeometryAdapter
from app.util.component.VacuateConf import VacuateConf
import datetime
class EntryDataVacuate:
def entry(self,parameter):
# 初始化任务
this_task = ThisTask(parameter)
this_task.write_process("入库任务初始化...")
# 数据路径,用作删除
_data_path=None
try:
metas: list = parameter.get("meta")
# 总的入库是否成功
is_success=True
this_task.update({"process": "入库中"})
# 开始入库事务
this_task.start()
# 多个文件依次入库
for meta in metas:
#设置编码
encoding = parameter.get("encoding")
if encoding and encoding in ["GBK","gbk","UTF-8","utf-8"]:
gdal.SetConfigOption("SHAPE_ENCODING",encoding)
else:
gdal.SetConfigOption("SHAPE_ENCODING", "GBK")
#如果包含cpg文件,优先使用cpg文件中声明的编码
encoding_cpg = meta.get("encoding")
if encoding_cpg:
gdal.SetConfigOption("SHAPE_ENCODING", encoding_cpg)
data_path = meta.get("data_path")
#设定删除路径
if not _data_path:
_data_path=data_path
if not data_path:
raise Exception("数据错误!")
# 分为shp和gdb 2种录入形式
if data_path.endswith("shp"):
is_success_one,new_layer_name = self.entry_shp(data_path,this_task,meta)
else:
is_success_one,new_layer_names = self.entry_gdb(data_path,this_task,meta)
#如果其中一个失败,总的入库就失败
if not is_success_one:
is_success=False
this_task.write_process("数据入库结束。")
if is_success:
# 更新任务为成功任务
this_task.update({"state": 1,"process":"入库完成","update_time": datetime.datetime.now()})
try:
this_task.commit()
except:
raise Exception("可能编码出错!")
else:
# 更新任务为失败任务
this_task.update({"state": -1, "process": "入库失败", "update_time": datetime.datetime.now()})
# rollback
this_task.rollback()
except Exception as e:
StructurePrint().print("herehere")
this_task.write_process("{} 任务结束!".format(e.__str__()))
this_task.update({"state": -1, "process": "入库失败", "update_time": datetime.datetime.now()})
StructurePrint().print(e.__str__(),"ERROR")
# rollback
this_task.rollback()
finally:
this_task.end()
try:
file_tmp_path = os.path.join(_data_path.split("file_tmp")[0],"file_tmp")
dir_path = os.path.dirname(_data_path)
i=0
while not os.path.dirname(dir_path).__eq__(file_tmp_path) and i<30:
dir_path = os.path.dirname(dir_path)
i+=1
if i<30:
shutil.rmtree(dir_path,True)
StructurePrint().print("删除文件成功!")
else:
raise Exception("找不到文件!")
except Exception as e:
StructurePrint().print(e.__str__(), "ERROR")
StructurePrint().print("删除文件失败!","ERROR")
def entry_shp(self,data_path,this_task,meta):
'''
录入shp
:param data_path:
:return:
'''
driver: Driver = ogr.GetDriverByName("ESRI Shapefile")
ds: DataSource = driver.Open(data_path, 0)
if not ds:
raise Exception("打开数据失败!")
layer: Layer = ds.GetLayer(0)
is_success_one, new_layer_name =self.entry_one_layer(layer, this_task,meta)
ds.Destroy()
return is_success_one, new_layer_name
def entry_gdb(self,data_path,this_task,meta):
'''
录入gdb
:param data_path:
:return:
'''
is_successes = []
new_layer_names=[]
driver: Driver = ogr.GetDriverByName("OpenFileGDB")
ds: DataSource = driver.Open(data_path, 0)
if not ds:
raise Exception("打开数据失败!")
for i in range(ds.GetLayerCount()):
layer: Layer = ds.GetLayer(i)
if layer.GetName() not in meta.get("layer").keys():
continue
is_success, new_layer_name = self.entry_one_layer(layer,this_task,meta)
new_layer_names.append(new_layer_name)
is_successes.append(is_success)
ds.Destroy()
if is_successes.__contains__(False):
return False,new_layer_names
else:
return True,new_layer_names
def entry_one_layer(self,layer: Layer,this_task,meta):
new_layer_name = None
vacuate_process= None
vacuate = int(this_task.parameter.get("vacuate", 0))
success = True
table_guid = uuid.uuid1().__str__()
try:
# 图层设置
parameter = this_task.parameter
overwrite = parameter.get("overwrite") if parameter.get("overwrite") is not None and parameter.get("overwrite")=="yes" else "no"
geom_name = parameter.get("geom_name") if parameter.get("geom_name") is not None else "geom"
fid = parameter.get("fid") if parameter.get("fid") is not None else "fid"
options = ["OVERWRITE={}".format(overwrite), "FID={}".format(fid), "GEOMETRY_NAME={}".format(geom_name),"PRECISION=NO","LAUNDER=NO"]
# 将线/面转多线多面
geom_type = GeometryAdapter.change_geom_type(layer.GetGeomType())
# 更改图层名
change_name = False
origin_name = layer.GetName()
# 新图层名
new_layer_name: str = meta.get("layer").get(origin_name)
origin_name = new_layer_name
no = 1
while overwrite.__eq__("no") and this_task.pg_ds.GetLayerByName(new_layer_name) :
change_name=True
new_layer_name = origin_name+"_{}".format(no)
no+=1
if change_name:
this_task.write_process("{}图层已存在,更名为{}入库".format(origin_name, new_layer_name))
this_task.write_process("{}图层正在入库...".format(new_layer_name))
pg_layer: Layer = this_task.pg_ds.CreateLayer(new_layer_name, layer.GetSpatialRef(), geom_type, options)
# 复制原图层的属性
# 去掉fid的属性
schema = [sche for sche in layer.schema if not sche.name.lower().__eq__(fid)]
pg_layer.CreateFields(schema)
#创建抽稀过程
if vacuate:
vacuate_process = VacuateProcess(layer,table_guid,options)
count =0
for feature in layer:
count+=1
if count%10000==0:
StructurePrint().print("{}图层已入库{}个对象".format(new_layer_name,count))
# print(time.time()-this_time)
#this_time=time.time()
geo :Geometry = feature.GetGeometryRef()
# 如果是空对象不录入
if geo is not None:
if geo.IsEmpty():
this_task.write_process("FID:{}要素的空间字段为空,跳过该要素!".format(feature.GetFID()))
StructurePrint().print("FID:{}要素的空间字段为空,跳过该要素!".format(feature.GetFID()),"WARN")
continue
out_feature: Feature = copy.copy(feature)
out_geom = None
if geo is not None:
out_geom:Geometry = GeometryAdapter.change_geom(geo, geom_type)
out_feature.SetGeometry(out_geom)
# 出现fid为0经常有问题
out_feature.SetFID(out_feature.GetFID() + 1)
pg_layer.CreateFeature(out_feature)
#插入抽稀图层
if out_geom is not None and vacuate:
vacuate_process.vacuate(out_geom)
# 注册图层信息
# 是否抽吸过
if vacuate:
is_vacuate = 1 if vacuate_process.max_level>0 else 0
else:
is_vacuate = 0
this_task.register_table(pg_layer,parameter.get("creator"),is_vacuate,table_guid)
# 注册抽稀表
if vacuate:
this_task.register_table_vacuate(table_guid,vacuate_process.vacuate_layers)
this_task.write_process("{}图层入库成功。".format(new_layer_name))
except Exception as e:
this_task.write_process("{}入库失败,数据回滚!原因:{}".format(new_layer_name,e.__str__()))
StructurePrint().print("{}入库失败,数据回滚!原因:{}".format(new_layer_name,e.__str__()), "error")
print(traceback.format_exc())
# 抽稀回滚
if vacuate:
vacuate_process.rollback()
success =False
finally:
if vacuate:
vacuate_process.end()
pass
return success,new_layer_name
class ThisTask:
def __init__(self, parameter):
try:
# 该任务事务的连接
self.sys_session: Session = PGUtil.get_db_session(configure.SQLALCHEMY_DATABASE_URI)
# 专门的写过程的连接
self.process_session: Session = PGUtil.get_db_session(configure.SQLALCHEMY_DATABASE_URI)
except Exception as e:
raise Exception("打开数据库失败!")
self.parameter = parameter
self.task = self.process_session.query(Task).filter_by(guid=parameter.get("task_guid"))
self.database = self.sys_session.query(Database).filter_by(
guid=parameter.get("database_guid")).one_or_none()
self.catalog_guid = parameter.get("catalog_guid")
self.pg_ds: DataSource = PGUtil.open_pg_data_source(1, DES.decode(self.database.sqlalchemy_uri))
def start(self):
self.pg_ds.StartTransaction()
def update(self, update_dict):
self.task.update(update_dict)
self.process_session.commit()
def write_process(self, message):
message = "{} {}".format(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), message)
task_process_guid = uuid.uuid1().__str__()
task_process = Process(guid=task_process_guid, message=message, time=datetime.datetime.now(),
task_guid=self.parameter.get("task_guid"))
self.process_session.add(task_process)
self.process_session.commit()
def register_table(self, layer: Layer,creator,is_vacuate,table_guid):
'''
注册表
:param layer: 图层
:param new_layer_name: 图层名
:return: 表名
'''
this_time = datetime.datetime.now()
ext = layer.GetExtent()
if ext[0] < 360:
ext = [round(e, 6) for e in ext]
else:
ext = [round(e, 2) for e in ext]
geom_type = GeometryAdapter.get_geometry_type(layer)
extent = "{},{},{},{}".format(ext[0], ext[1], ext[2], ext[3])
table = Table(guid=table_guid,
database_guid=self.database.guid,
creator=creator,
# name=new_layer_name,
name=layer.GetName(),
create_time=this_time, update_time=this_time,
catalog_guid=self.catalog_guid, table_type=GeometryAdapter.get_table_type(geom_type),
extent=extent,
feature_count=layer.GetFeatureCount(),
is_vacuate=is_vacuate
)
# 删除遗留业务数据
try:
history_table = self.sys_session.query(Table).filter_by(name=layer.GetName(),database_guid=self.database.guid).all()
except:
self.sys_session: Session = PGUtil.get_db_session(configure.SQLALCHEMY_DATABASE_URI)
history_table = self.sys_session.query(Table).filter_by(name=layer.GetName(),database_guid=self.database.guid).all()
if history_table:
for ht in history_table:
self.sys_session.delete(ht)
self.sys_session.add(table)
feature_defn: FeatureDefn = layer.GetLayerDefn()
for i in range(feature_defn.GetFieldCount()):
field_defn: FieldDefn = feature_defn.GetFieldDefn(i)
field_name = field_defn.GetName()
field_alias = field_name if field_defn.GetAlternativeName() is None or field_defn.GetAlternativeName().__eq__(
"") else field_defn.GetAlternativeName()
column = Columns(guid=uuid.uuid1().__str__(), table_guid=table_guid,
name=field_name, alias=field_alias, create_time=this_time, update_time=this_time)
self.sys_session.add(column)
return table_guid
def register_table_vacuate(self,table_guid,vacuate_layers:dict):
# 抽稀表有固定的命名规则
for level,layer in vacuate_layers.items():
pixel_distance_str:str=layer.GetName().split("_")[-1]
lev = layer.GetName().split("_")[-2]
if pixel_distance_str.startswith("0"):
pixel_distance_str="0.{}".format(pixel_distance_str)
pixel_distance = float(pixel_distance_str)
table_vacuate = TableVacuate(guid=uuid.uuid1().__str__(),
table_guid=table_guid,
level=int(lev),
name=layer.GetName(),
pixel_distance=pixel_distance)
self.sys_session.add(table_vacuate)
def commit(self):
if self.sys_session:
self.sys_session.commit()
if self.pg_ds:
self.pg_ds.CommitTransaction()
if self.process_session:
self.process_session.commit()
def end(self):
if self.sys_session:
self.sys_session.close()
if self.pg_ds:
self.pg_ds.Destroy()
if self.process_session:
self.process_session.close()
def rollback(self):
if self.sys_session:
self.sys_session.rollback()
if self.pg_ds:
self.pg_ds.RollbackTransaction()
class VacuateProcess:
max_level=0
fill_dict={}
vacuate_layers={}
vacuate_layers_gridsize={}
pg_ds_dict = {}
# 图层要素大于5W才抽稀
least_vacuate_count = VacuateConf.least_vacuate_count
extent=[]
is_spatial=False
lonlat_gridsize = VacuateConf.lonlat_gridsize
project_gridsize = VacuateConf.project_gridsize
# 该抽稀过程使用的grid_size
t_grid_size = []
# 该抽稀过程的抽稀网格
this_gridsize=[]
def __init__(self,layer:Layer,table_guid, options,sqlalchemy_uri):
#是空间图层才初始化
if layer.GetExtent()[0] > 0 or layer.GetExtent()[0] < 0:
self.is_spatial=True
# 判断需要抽稀多少级
lc = layer.GetFeatureCount()
extent = layer.GetExtent()
self.extent=extent
#判断疏密程度
p_x = (extent[1]-extent[0])/10.0
p_y = (extent[3] - extent[2]) / 10.0
fill_precent=0
StructurePrint().print("判断疏密")
for ix in range(10):
for iy in range(10):
grid_extent = [extent[0]+ix*p_x,extent[0]+ix*p_x+p_x,extent[2]+iy*p_y,extent[2]+iy*p_y+p_y]
poly = GeometryAdapter.envelop_2_polygon(grid_extent)
layer.SetSpatialFilter(None)
layer.SetSpatialFilter(poly)
layer.ResetReading()
if layer.GetNextFeature():
fill_precent += 1
print(fill_precent)
StructurePrint().print("判断疏密结束")
layer.SetSpatialFilter(None)
layer.ResetReading()
# 固有疏密程度
original_density=8
if extent[0]>180:
self.t_grid_size=self.project_gridsize
else:
self.t_grid_size = self.lonlat_gridsize
for grid_size in self.t_grid_size:
# 最少抽稀个数
if lc > self.least_vacuate_count:
# 网格数至少大于
if ((extent[1] - extent[0]) * (extent[3] - extent[2])) / (grid_size**2)>self.least_vacuate_count:
# 要素数量大于网格数量
# 要考虑图层的疏密程度,original_density*(100.0/fill_precent) 为疏密指数
if lc * original_density * (100.0/fill_precent)>((extent[1] - extent[0])*(extent[3] - extent[2]))/(grid_size**2) :
print(grid_size)
self.this_gridsize.append(grid_size)
self.max_level += 1
# 创建抽稀ds
for l in range(self.max_level):
# pg_ds_l: DataSource = PGUtil.open_pg_data_source(1, DES.decode(sqlalchemy_uri))
if configure.VACUATE_DB_URI:
pg_ds_l: DataSource = PGUtil.open_pg_data_source(1, configure.VACUATE_DB_URI)
else:
pg_ds_l: DataSource = PGUtil.open_pg_data_source(1, DES.decode(sqlalchemy_uri))
pg_ds_l.StartTransaction()
self.pg_ds_dict[l] = pg_ds_l
# 生成抽稀图层
options = options[1:]
options.append("OVERWRITE=yes")
options.append("LAUNDER=no")
schema = layer.schema
# 增加统计字段
schema.append(ogr.FieldDefn("_dcigrid_count_", ogr.OFTInteger))
schema.append(ogr.FieldDefn("_dcigrid_name_", ogr.OFTString))
for l in range(self.max_level):
this_grid_len = self.this_gridsize[l]
self.vacuate_layers_gridsize[l] = this_grid_len
pg = self.pg_ds_dict[l]
grid_name = str(this_grid_len)
if this_grid_len<1:
grid_name = str(this_grid_len).split(".")[-1]
if this_grid_len.__eq__(0.00008):
grid_name = "00008"
# 抽稀图层是点面混合的
# 抽稀表有固定的命名规则
# 抽稀表一定要覆盖
print("{}:{}".format(self.t_grid_size.index(this_grid_len),this_grid_len))
v_ln = "z{}_vacuate_{}_{}".format(table_guid, self.t_grid_size.index(this_grid_len), grid_name)
vl = pg.CreateLayer(v_ln, layer.GetSpatialRef(),ogr.wkbUnknown, options)
# 抽稀表需要属性
vl.CreateFields(schema)
self.vacuate_layers[l] = vl
#创建抽稀关系表
else:
pass
def vacuate(self,g,feature):
if self.is_spatial:
# 插入到所有抽稀图层中
for level in range(self.max_level):
center: Geometry = g.Centroid()
extent = g.GetEnvelope()
long_extent= extent[1]-extent[0]
lat_extent = extent[3]-extent[2]
this_grid_len =self.vacuate_layers_gridsize[level]
#超大的直接加入
# if long_extent > 10*this_grid_len or lat_extent >10*this_grid_len:
# vacuate_layer: Layer = self.vacuate_layers.get(level)
# feat = ogr.Feature(vacuate_layer.GetLayerDefn())
# feat.SetGeometry(g)
# vacuate_layer.CreateFeature(feat)
# else:
row = int((center.GetY() - self.extent[2]) / this_grid_len)
col = int((center.GetX() - self.extent[0]) / this_grid_len)
key = "{}.{}.{}".format(level, row, col)
if not self.fill_dict.get(key):
self.fill_dict[key] = 0
if self.fill_dict[key] == 0:
vacuate_layer: Layer = self.vacuate_layers.get(level)
feat = ogr.Feature(vacuate_layer.GetLayerDefn())
# 如果图形比网格小,直接存储其中心点
if this_grid_len>long_extent and this_grid_len>lat_extent:
feat.SetGeometry(center)
else:
feat.SetGeometry(g)
# 复制旧feature属性
field_dict = feature.items()
for field_name in field_dict:
feat.SetField(field_name, field_dict[field_name])
feat.SetField("_dcigrid_name_",".".join(key.split(".")[1:]))
vacuate_layer.CreateFeature(feat)
self.fill_dict[key] += 1
#超大的还有机会
elif (long_extent > 10*this_grid_len or lat_extent >10*this_grid_len) and self.fill_dict[key]<5:
vacuate_layer: Layer = self.vacuate_layers.get(level)
feat = ogr.Feature(vacuate_layer.GetLayerDefn())
feat.SetGeometry(g)
# 复制旧feature属性
field_dict = feature.items()
for field_name in field_dict:
feat.SetField(field_name, field_dict[field_name])
feat.SetField("_dcigrid_name_",".".join(key.split(".")[1:]))
vacuate_layer.CreateFeature(feat)
self.fill_dict[key] += 1
else:
self.fill_dict[key] += 1
def set_vacuate_count(self):
if self.is_spatial:
# 插入到所有抽稀图层中
for level in range(self.max_level):
vacuate_layer: Layer = self.vacuate_layers.get(level)
for feat in vacuate_layer:
key = "{}.{}".format(level,feat.GetField("_dcigrid_name_"))
feat.SetField("_dcigrid_count_",self.fill_dict.get(key))
vacuate_layer.SetFeature(feat)
def end(self):
for pg in self.pg_ds_dict.values():
pg.Destroy()
def rollback(self):
for pg in self.pg_ds_dict.values():
pg.RollbackTransaction()