mirror of
https://github.com/medialab-prado/poblados-colonizacion-colonias-penitenciarias.git
synced 2025-01-15 05:30:57 +01:00
109 lines
4 KiB
Python
109 lines
4 KiB
Python
|
import urllib2
|
||
|
import sys, csv
|
||
|
from bs4 import BeautifulSoup
|
||
|
import string
|
||
|
|
||
|
printable = set(string.printable)
|
||
|
|
||
|
# Abre un csv de lectura para leer los datos
|
||
|
f = open('embalses.csv','r')
|
||
|
reader = csv.reader(f)
|
||
|
|
||
|
# Abre un csv de escritura para volcar los datos
|
||
|
f2 = open('embalses-snczi.csv','w')
|
||
|
writer = csv.writer(f2)
|
||
|
|
||
|
names = ["codpresa", "Nombre", "Otro nombre", "Fecha", "Anyo", "Rio", "Municipio", "Cuenca", "Provincia", "x", "y", "Tipos"]
|
||
|
writer.writerow([unicode(s).encode("utf-8") for s in names])
|
||
|
|
||
|
# url base para query WMS GetFeatureInfo 1x1:
|
||
|
baseurl = "http://wms.mapama.es/sig/Agua/Presas/wms.aspx"
|
||
|
params = "?version=1.1.1&request=GetFeatureInfo&layers=Presas&styles=default&SRS=EPSG:32630&width=1&height=1&info_format=text/plain&X=0&Y=0&query_layers=Presas&startIndex=0&bbox="
|
||
|
|
||
|
rownum = 0
|
||
|
for row in reader:
|
||
|
if rownum > 0:
|
||
|
nombre = row[1]
|
||
|
year = row[7].split("-")[2]
|
||
|
|
||
|
# Filtro Franco
|
||
|
if year:
|
||
|
year = int(year)
|
||
|
if year > 1938 and year < 1974:
|
||
|
|
||
|
x = int(row[9])
|
||
|
y = int(row[10])
|
||
|
|
||
|
txt = ""
|
||
|
d = 0;
|
||
|
while len(txt) < 10:
|
||
|
d += 100
|
||
|
xmin = str(x - d)
|
||
|
ymin = str(y - d)
|
||
|
xmax = str(x + d)
|
||
|
ymax = str(y + d)
|
||
|
|
||
|
url = baseurl + params + xmin + "," + ymin + "," + xmax + "," + ymax
|
||
|
txt = urllib2.urlopen(url).read()
|
||
|
print len(txt)
|
||
|
|
||
|
# print "BUSQUEDA"
|
||
|
# print url
|
||
|
|
||
|
datos = txt.split(";")
|
||
|
codigo = datos[12]
|
||
|
|
||
|
# Descargamos ficha
|
||
|
ficha = "http://sig.mapama.es/93/ClienteWS/snczi/default.aspx?nombre=PRESA&claves=DGAGUA.PRESAS.CODPRESA&valores="+codigo
|
||
|
html = urllib2.urlopen(ficha).read()
|
||
|
|
||
|
# print "FICHA"
|
||
|
# print ficha
|
||
|
|
||
|
soup = BeautifulSoup(html,'lxml')
|
||
|
h2 = soup.find("h2")
|
||
|
if h2:
|
||
|
data = ["---", "---","---","---","---","---","---","---","---","---","---","---","---"]
|
||
|
print h2
|
||
|
data[0] = codigo
|
||
|
trs = soup.find("table").findAll("tr", attrs={"class": "alt"})
|
||
|
for tr in trs:
|
||
|
th = tr.find("th").text.strip()
|
||
|
td = tr.find("td")
|
||
|
if "Nombre de la presa" in th:
|
||
|
data[1] = td.text.strip()
|
||
|
if "Otro Nombre" in th:
|
||
|
data[2] = td.text.strip()
|
||
|
if "Fecha de finaliza" in th:
|
||
|
data[3] = td.text.strip()
|
||
|
dmy = data[3].split("-")
|
||
|
data[4] = dmy[2]
|
||
|
if "en el que se encuentra la presa" in th:
|
||
|
data[5] = td.text.strip()
|
||
|
if "Municipio" in th:
|
||
|
data[6] = td.text.strip()
|
||
|
if "Cuenca hidro" in th:
|
||
|
data[7] = td.text.strip()
|
||
|
if "Provincia" in th:
|
||
|
data[8] = td.text.strip()
|
||
|
if "Coordenadas" in th:
|
||
|
coords = td.text.strip()
|
||
|
xy = coords.split(" - ")
|
||
|
data[9] = xy[0]
|
||
|
data[10] = xy[1]
|
||
|
if "Tipos" in th:
|
||
|
data[11] = td.text.strip()
|
||
|
nombre = filter(lambda x: x in printable, nombre)
|
||
|
data[12] = nombre
|
||
|
|
||
|
if "Riego" in data[11]:
|
||
|
print "DATOS"
|
||
|
print nombre
|
||
|
print data[1]
|
||
|
writer.writerow([unicode(s).encode("utf-8") for s in data])
|
||
|
|
||
|
else:
|
||
|
print "error"
|
||
|
writer.writerow([x,y])
|
||
|
rownum += 1
|