-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmake-local-tabular-DB.R
More file actions
151 lines (97 loc) · 3.1 KB
/
make-local-tabular-DB.R
File metadata and controls
151 lines (97 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
##
##
##
## Notes:
# on 4-1, re-use SSA .zip archives
# requires 290GB
# ~ 7 minutes on 4-1
# time parallel --eta unzip ::: *.zip
#
# rm *.zip
#
# A lot of free space is required for all of SSURGO
# FY24 SSURGO (uncompressed): 217GB
# Latest STATSGO (uncompressed): 1GB
# Final SSURGO + STATSGO tabular database: 14GB (2.1GB gzipped)
## TODO: integrate with SoilWeb refresh cycle / scripts
## TODO: consider adding RSS tabular data
## will fail on a slow connection, WSS / SDA are shutdown most nights
library(soilDB)
library(RSQLite)
library(DBI)
## paths
# soilweb
.exdir1 <- 'statsgo'
.exdir2 <- 'ssurgo'
.dbfile <- 'ssurgo-combined.sqlite'
# local machine
.exdir1 <- 'e:/temp/statsgo'
.exdir2 <- 'e:/temp/ssurgo'
.dbfile <- 'e:/gis_data/SSURGO-STATSGO-tabular/ssurgo-combined.sqlite'
## SSAs to iterate over
q <- "SELECT areasymbol, saverest FROM sacatalog WHERE areasymbol NOT IN ('US', 'MXNL001');"
x <- SDA_query(q)
nrow(x)
## download
# notes:
# * all of SSURGO will never finish at home / Sonora MLRA office
# * must increase curl timeout on a slow connection (STATSGO will fail with defaults)
# * parallel downloads would be faster (1 hour on soilmap 2-1)
## STATSGO
# * gov machine, Sonora MLRA office: 13 minutes
# * soilmap 4-1: 11 minutes
options('soilDB.timeout' = 1e6)
system.time(
downloadSSURGO(areasymbols = 'US', exdir = .exdir1, include_template = FALSE, remove_zip = TRUE, extract = TRUE, overwrite = TRUE, db = 'STATSGO')
)
## SSURGO
#
# not really tested, since I already have a copy of these files in /ssa/fy25
# system.time(
# downloadSSURGO(areasymbols = x$areasymbol, exdir = .exdir2, include_template = FALSE, remove_zip = TRUE, extract = TRUE, overwrite = TRUE, db = 'SSURGO')
# )
## create database
# fresh start, remove whatever was left from last time
unlink(.dbfile)
# first pass, STATSGO
# * gov machine: ~ 91 seconds
# * soilweb 4-1: ~ 32 seconds
system.time(
createSSURGO(filename = .dbfile, exdir = .exdir1, include_spatial = FALSE, overwrite = FALSE)
)
# second pass, SSURGO
# * gov machine: (not possible yet)
# * soilweb 4-1: 106 minutes
system.time(
createSSURGO(filename = .dbfile, exdir = .exdir2, include_spatial = FALSE, overwrite = FALSE)
)
# sqlite file is 65GB
## connect to finish up
db <- dbConnect(RSQLite::SQLite(), .dbfile)
# cleanup
# requires > 65GB free space
# ~ 15 minutes
dbExecute(db, 'DROP TABLE cointerp;')
dbExecute(db, 'VACUUM;')
# sqlite file is 32GB
# check indices
dbGetQuery(db, 'PRAGMA index_list(mapunit);')
dbGetQuery(db, "select type, name, tbl_name, sql
FROM sqlite_master
WHERE type = 'index' AND tbl_name = 'chorizon' ;")
## TODO additional or specialized indexing?
## check
# list tables
dbListTables(db)
# STATSGO
dbGetQuery(db, "SELECT mukey, muname, mukind FROM mapunit WHERE mukey = '658083' ;")
# SSURGO
dbGetQuery(db, "SELECT mukey, muname, mukind FROM mapunit WHERE mukey = '2600481' ;")
# simple query
dbGetQuery(db, 'SELECT cokey, compname, comppct_r, majcompflag FROM component LIMIT 5;')
# be sure to close connection / file
dbDisconnect(db)
## on 4-1, gzip for xfer
## cleanup
rm(list = ls())
gc(reset = TRUE)