geocoding-viviendas-turisticas-comunitat-valenciana.php 4.33 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
<?php 
$csv_filename = '../data/original/190302_viviendas-turisticas-comunidad-valenciana_valencia.csv';
$out_filename = '../data/output/190302_viviendas-turisticas-comunidad-valenciana_valencia_geocoded.csv';
$line_length = "4096"; // max line lengh (increase in case you have longer lines than 1024 characters)
$delimiter = ","; // field delimiter character
$enclosure = '"'; // field enclosure character


// Geocoding script using Nominatim http://nominatim.openstreetmap.org/
// to get coordinates using City, Country, street name, house number and Postal Code
function geocode_it( $country='',$state='',$city='',$street_name='',$house_number='') {
	// use nominatim geocoding service to get coords
	$q = "https://nominatim.openstreetmap.org/search?format=json&country=".urlencode($country)."&city=".urlencode($city)."&state=".urlencode($state)."&street=".urlencode($house_number." ".$street_name)."&limit=1";

	// we use curl instead of file_get_contents for not being blocked by nominatim
	//$results_json = file_get_contents($q);
	$headers = [
		'X-Apple-Tz: 0',
		'X-Apple-Store-Front: 143444,12',
		'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
		'Accept-Encoding: gzip, deflate',
		'Accept-Language: en-US,en;q=0.5',
		'Cache-Control: no-cache',
		'Content-Type: application/x-www-form-urlencoded; charset=utf-8',
		'Host: www.example.com',
		'Referer: http://www.example.com/index.php', //Your referrer address
		'User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:28.0) Gecko/20100101 Firefox/28.0',
		'X-MicrosoftAjax: Delta=true'
	];

	$ch = curl_init();

	curl_setopt($ch, CURLOPT_URL,$q);
	//curl_setopt($ch, CURLOPT_POST, 1);
	//curl_setopt($ch, CURLOPT_POSTFIELDS,$vars);  //Post Fields
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
	curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

	$curl_response = curl_exec ($ch);
	curl_close ($ch);

	$results = json_decode($curl_response,TRUE); // if second parameter is set to TRUE, the output is ass. array

	if ( !array_key_exists('0',$results) ) {
		$q = "https://nominatim.openstreetmap.org/search?format=json&q=".urlencode($country." ".$city." ".$street_name. " " .$street_number). "&limit=1";

		$ch = curl_init();

		curl_setopt($ch, CURLOPT_URL,$q);
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
		curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
		$curl_response = curl_exec ($ch);

		curl_close ($ch);

		$results = json_decode($curl_response,TRUE);
	}

	if ( array_key_exists('0',$results) )
		return $results[0];
	return;
}


// open the original file
$fp = fopen($csv_filename,'r');

// get data and store it in array
if ( $fp !== FALSE ) { // if the file exists and is readable

	// data array generation
	$data = array();
	$output = array();
	$output[] = '"Signatura","Municipio","Provincia","Address","Teléfono","lat","lon"'.PHP_EOL;
	$line = -1;
	while ( ($fp_csv = fgetcsv($fp,$line_length,$delimiter,$enclosure)) !== FALSE ) { // begin main loop
		$line++;
		if ( $line == 0 )
			continue;

		// debug
82
		//if ( $line == 15 )
83 84 85 86 87 88 89 90 91 92
		//	break;

		echo "Geocoding register ".$line."...";
		echo "\r\n";

		$country = 'Spain';
		$state = 'Valencia';
		$city = 'Valencia';

		$address = $fp_csv[3];
93 94 95
		echo "Trying to geocode ".$address."...";
		echo "\r\n";

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
		$pattern = '/^[a-zA-Z]* ([^,]*), .*/i';
		$rep = '${1}';
		$street_name = preg_replace($pattern, $rep, $address);

		$pattern = '/^[^,]*,[^0-9]*([0-9]*).*/i';
		$rep = '${1}';
		$street_number = preg_replace($pattern, $rep, $address);

		$geo = geocode_it($country,$state,$city,$street_name,$street_number);

		$lat = '';
		if ( $geo['lat'] != '' )
			$lat = $geo['lat'];

		$lon = '';
		if ( $geo['lon'] != '' )
			$lon = $geo['lon'];

		if ( $lat == '' || $lon == '' )
			echo 'Geocoding failed for record '.$line.'.';
		else
			echo 'Geocoding succeded for record '.$line.'.';

		echo "\r\n";
120
		echo "\r\n";
121 122 123

		$output[] = '"'.$fp_csv[0].'","'.$fp_csv[1].'","'.$fp_csv[2].'","'.$fp_csv[3].'","'.$fp_csv[4].'","'.$lat.'","'.$lon.'"'.PHP_EOL;

124 125 126
		// delay between queries to avoid being blocked 
		sleep(3);

127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
	}
	fclose($fp);

	// open the output file to write
	$fp = fopen($out_filename,'w');
	// get data and store it in array
	if ( $fp !== FALSE ) { // if the file exists and is readable
		echo 'Writing geodata to '.$out_filename;
		echo "\r\n";
		foreach ( $output as $o ) {
			fwrite($fp,$o);
		}
		fclose($fp);
		echo 'Geocoding ended.';
		echo "\r\n";
	}

}

?>