Commit b0a3e012 authored by dagal's avatar dagal
Browse files

Merge branch 'madrid_emt'

parents 5943c892 31c777ea
#!/bin/bash
USER=postgres
PW=postgres
DB=trippydb
PGPASSWORD=$PW psql -h localhost -U $USER -d $DB -q -f schema.sql
PGPASSWORD=$PW psql -h localhost -U $USER -d $DB -q -f emt_stops.sql
PGPASSWORD=$PW psql -h localhost -U $USER -d $DB -q -f emt_lines.sql
PGPASSWORD=$PW psql -h localhost -U $USER -d $DB -q -f emt_linestops.sql
PGPASSWORD=$PW psql -h localhost -U $USER -d $DB -q -f emt_journeys.sql
unzip -p emt_stages.zip | PGPASSWORD=$PW psql -h localhost -U $USER -d $DB -q
......@@ -14,7 +14,7 @@ from pyqtree import Index
import utm
seconds_day = 24*60*60
days_cycle = 31
days_cycle = 30
def parse_time(time_str):
time_val = [int(t) for t in time_str.split(":")]
......@@ -30,8 +30,9 @@ class Stop():
# self.routes = set()
self.connections = set()
# self.freq = 0
# self.name = name
(self.x, self.y) = (lat, lng)
self.name = name
(self.lat, self.lng) = (lat, lng)
#(self.x, self.y) = (lat, lng)
def connect(self, stop):
stop.connections.add(self.id)
......@@ -87,7 +88,7 @@ class Network():
self.stops[stop.id] = stop
# self.stops_by_name[stop.name] = stop
(stop.x, stop.y, _, _) = utm.from_latlon(stop.x, stop.y)
(stop.x, stop.y, _, _) = utm.from_latlon(stop.lat, stop.lng)
for s in self.find_near_stops(stop, 100):
s.connect(stop)
......@@ -105,10 +106,33 @@ class Network():
return trip
def compute_trips_by_stop(self):
def compute_trips_by_stop(self, prefix=None):
tripsByStop = [{} for _ in xrange(7)]
lineStops = {}
if prefix:
fls = open(prefix + "_linestops.sql", "w")
for trip in self.trips.values():
stops = [s for (_,_,s) in trip.stops]
if len(set(stops)) < len(stops):
sys.stderr.write("Skipping line %s trip %s\n" % (trip.get_line(), trip.id))
continue
if prefix:
line = trip.get_line()
if line in lineStops:
if len(lineStops[line]) != len(stops):
sys.stderr.write(line + ": " + lineStops[line] + " vs " + stops + "\n")
else:
lineStops[line] = stops
for i,s in enumerate(stops):
mystr = "INSERT INTO line_stop (line_id, stop_id, seq) VALUES ('%s', '%s', %d);\n" % (line, s, i+1)
fls.write(mystr.encode("utf-8"))
for day in [d[0] for d in enumerate(trip.days) if d[1]]:
for _,end_time,stop in trip.stops:
if stop in tripsByStop[day]:
......@@ -116,15 +140,21 @@ class Network():
else:
tripsByStop[day][stop] = [(end_time, trip.id)]
if prefix:
fls.close()
for day in tripsByStop:
for trips in day.values():
trips.sort()
return tripsByStop
def calculate_trips_by_day(self, stop_dict, cycle):
def calculate_trips_by_day(self, stop_dict, cycle, prefix=None):
tripsByDay = [{} for _ in xrange(cycle)]
tripCounter = {}
if prefix:
fj = open(prefix + "_journeys.sql", "w")
for day in xrange(cycle):
#print "DAY " + str(day)
......@@ -133,22 +163,28 @@ class Network():
key=lambda t: (t.route, t.direction, t.start_time, t.end_time)):
line = trip.get_line()
c = 1
if (line, trip.start_time) in tripsByDay[day]:
continue
if line in tripCounter:
c = tripCounter[line]
tripCounter[line] = c+1
tripCounter[line] += 1
else:
tripCounter[line] = 1
tripsByDay[day][(line, trip.start_time)] = c
tripsByDay[day][(line, trip.start_time)] = tripCounter[line]
if prefix:
mystr = "INSERT INTO journey (line_id, journey_id, start_time) VALUES ('%s', %d, to_timestamp(%d) at time zone 'UTC');\n" % (line, tripCounter[line], day*seconds_day + trip.start_time)
fj.write(mystr.encode("utf-8"))
#print trip.get_line() + ": "
#print ",".join(["%s-%s" % (encode_time(s[0]), stop_dict[s[2]]) for s in trip.stops])
if prefix:
fj.close()
return tripsByDay
class TDay():
......@@ -191,15 +227,19 @@ class TTime():
#return self.strftime("%H:%M")
return str(self.val())
def parse_gtfs(file_in, file_out, file_freqs = None, network = Network()):
def parse_gtfs(file_in, file_out, file_freqs = None, network = Network(), prefix = None):
loader = transitfeed.Loader(file_in, problems = transitfeed.problems.ProblemReporter())
sched = loader.Load()
for r in sched.GetRouteList():
mystr = "INSERT INTO line (id, short_name, long_name) VALUES ('%sd0', '%s', '%s');" % (r.route_id, r.route_short_name, r.route_long_name)
print(mystr.encode("utf-8"))
mystr = "INSERT INTO line (id, short_name, long_name) VALUES ('%sd1', '%s(R)', '%s (vuelta)';" % (r.route_id, r.route_short_name, r.route_long_name)
print(mystr.encode("utf-8"))
if prefix:
flines = open(prefix + "_lines.sql", "w")
fstops = open(prefix + "_stops.sql", "w")
for r in sched.GetRouteList():
mystr = "INSERT INTO line (id, short_name, long_name) VALUES ('%sd0', '%s', '%s');\n" % (r.route_id, r.route_short_name, r.route_long_name)
flines.write(mystr.encode("utf-8"))
mystr = "INSERT INTO line (id, short_name, long_name) VALUES ('%sd1', '%s(R)', '%s (vuelta)');\n" % (r.route_id, r.route_short_name, r.route_long_name)
flines.write(mystr.encode("utf-8"))
for t in sched.GetTripList():
days = sched.GetServicePeriod(t.service_id).day_of_week
......@@ -207,11 +247,21 @@ def parse_gtfs(file_in, file_out, file_freqs = None, network = Network()):
for stop_time in t.GetStopTimes():
s = sched.GetStop(stop_time.stop_id)
stop = network.add_stop(Stop(s.stop_id, lat=s.stop_lat, lng=s.stop_lon))
stop = network.add_stop(Stop(s.stop_id, name=s.stop_name, lat=s.stop_lat, lng=s.stop_lon))
trip.add_stop(parse_time(stop_time.arrival_time), parse_time(stop_time.departure_time), stop)
network.add_trip(trip)
if prefix:
for i,stop_id in enumerate(sorted(network.stops.keys())):
s = network.stops[stop_id]
mystr = "INSERT INTO stop (id, ctr_id, name, lat, lon) VALUES ('%s', %d, '%s', %f, %f);\n" % (s.id, i+1, s.name, s.lat, s.lng)
fstops.write(mystr.encode("utf-8"))
#s.name = None
flines.close()
fstops.close()
if file_freqs:
file = open(file_freqs)
for line in file:
......@@ -307,22 +357,40 @@ def load_stops(path, nStops=20000):
return stops
def is_better_stop(candidate_stop, next_stops):
same_stops = [s for s in next_stops if s[2] == candidate_stop[2]]
if len(same_stops) == 0:
# stop not in the next ones, should keep
return True
else:
# stop will arrive sooner this way, should keep
return candidate_stop[0] < same_stops[0][0]
def main(argv):
prefix = None
#prefix = "emt"
n_traj = 0
#n_traj = 1000
#n_traj = 10
#n_traj = 100000
#n_traj = 10000000
#change_probs = [0.50, 0.90, 0.95, 0.98, 1.0]
change_probs = [0.98, 0.98, 0.99, 1.0]
#change_probs = [0.98, 0.98, 0.99, 1.0]
switch_weights = [10, 10, 5, 1]
changes = collections.Counter()
lengths = collections.Counter()
#print("BEGIN TRANSACTION;")
#network = parse_gtfs("madrid_emt.zip", None)
#network = parse_gtfs("madrid_emt.zip", "madrid_emt.dat", prefix=prefix)
#network = parse_gtfs("madrid_crtm.zip", "madrid_bus.dat", network=network)
#print("COMMIT;")
#sys.exit(0)
network = load_gtfs("madrid_bus.dat")
network = load_gtfs("madrid_emt.dat")
#tripsByStop = network.compute_trips_by_stop(prefix=prefix)
tripsByStop = network.compute_trips_by_stop()
#sys.exit(0)
valid_stops = None
......@@ -341,135 +409,125 @@ def main(argv):
#print len(network.lines.keys())
#network.assign_freqs()
stops_dict = {key: value+1 for value, key in enumerate(sorted(network.stops.keys()))}
#tripsByDay = network.calculate_trips_by_day(stops_dict, days_cycle, prefix=prefix)
tripsByDay = network.calculate_trips_by_day(stops_dict, days_cycle)
#sys.exit(0)
unused_stops = set(network.stops)
i = 0
max_waiting_time = 30*60
err = 0
skip_bad = 0
n_switch = [x*n_traj/sum(switch_weights) for x in switch_weights]
n_switch[0] += n_traj - sum(n_switch)
if prefix:
ft = open(prefix + "_stages.sql", "w")
while i < n_traj:
current_day = getRandomDay()
origin = network.stops[random.choice(tripsByStop[current_day.val() % 7].keys())]
(t, trip_id) = random.choice(tripsByStop[current_day.val() % 7][origin.id])
current_trip = network.trips[trip_id]
next_stops = [s for s in reversed(current_trip.stops) if s[1] > t and s[2] != origin]
current_line = current_trip.get_line()
current_time = TTime(current_day, 0) + t
next_stops = [s for s in current_trip.stops if s[1] > t and s[2] != origin.id]
if len(next_stops) == 0:
skip_bad += 1
continue
complete_trajectory = [origin.id]
trajectory = [origin.id]
current_line = current_trip.get_line()
cur_changes = 0
current_time = TTime(current_day, 0) + t
times = [tripsByDay[current_day.val()][(current_line, current_trip.start_time)]]
prob_next = 0.0
lines = [current_line]
(t,_,next) = next_stops.pop()
complete_trajectory = [origin.id, next]
try:
while random.random() > prob_next:
prob_next += 0.01
should_change = len(next_stops) == 0 or random.random() > change_probs[cur_changes]
if should_change:
if cur_changes+1 == len(change_probs):
break
prev = next
prev_t = t
connections = [s for s in network.stops[next].connections if s not in complete_trajectory]
trips = [current_trip]
times = [tripsByDay[current_day.val()][(current_line, current_trip.start_time)]]
switch_stops = [s for s in next_stops if len(network.stops[s[2]].connections) > 0 and s[2] not in complete_trajectory]
switch_goal = 0
cur_switch = 0
for switch,n in reversed([x for x in enumerate(n_switch)]):
if n > 0:
switch_goal = switch
break
while cur_switch < switch_goal and len(switch_stops) > 0:
ta,tz,next = random.choice(switch_stops)
try:
next_time = TTime(current_time.day,0) + ta
next_day = next_time.day if next in tripsByStop[next_time.day.val() % 7] else current_time.day
possible_trips = [(ttrip[0], network.trips[ttrip[1]]) for c in network.stops[next].connections \
for ttrip in (tripsByStop[next_day.val() % 7][c] if c in tripsByStop[next_day.val() % 7] else []) \
if ta <= ttrip[0] <= ta + max_waiting_time and c not in complete_trajectory]
possible_trips = [(t,trip) for (t,trip) in possible_trips \
if trip.route not in [tr.route for tr in trips] \
and len([s for s in trip.stops if s[1] > t and s[2] not in complete_trajectory and is_better_stop(s,next_stops)]) > 1]
if len(possible_trips) > 0:
complete_trajectory += [s[2] for s in next_stops if s[0] <= tz]
trajectory.append(next)
trips.append(current_trip)
lines.append(current_line)
times.append(tripsByDay[current_day.val()][(current_line, current_trip.start_time)])
current_day = next_day
(t, current_trip) = random.choice(possible_trips)
current_line = current_trip.get_line()
current_time = TTime(current_day, 0) + t
while len(connections) > 0:
try:
next = random.choice(connections)
next_day = current_day if next in tripsByStop[current_day.val() % 7] else current_time.day
possible_trips = [(ttrip[0], network.trips[ttrip[1]]) for ttrip in \
tripsByStop[next_day.val() % 7][next] \
if t <= ttrip[0] <= t + max_waiting_time \
and network.trips[ttrip[1]].route != current_trip.route\
and len([s for s in network.trips[ttrip[1]].stops if s[1] > ttrip[0]]) > 1]
if len(possible_trips) > 0:
current_day = next_day
break
else:
connections.remove(next)
except (IndexError,KeyError):
connections.remove(next)
if len(connections) == 0:
next = prev
t = prev_t
prob_next = 1
else:
if valid_stops:
if current_line not in valid_stops[stops_dict[prev]]:
print("DAMN")
print((stops_dict[prev], current_line))
skip_bad += 1
break
(t, current_trip) = random.choice(possible_trips)
next_stops = [s for s in reversed(current_trip.stops) if s[1] > t]
lines.append(current_line)
trajectory.append(prev)
times.append(times[-1])
current_line = current_trip.get_line()
current_time = TTime(current_day, 0) + t
cur_changes += 1
if current_line not in valid_stops[stops_dict[next]]:
print("DAMN")
print((stops_dict[next], current_line))
skip_bad += 1
break
lines.append(current_line)
trajectory.append(next)
times.append(tripsByDay[current_day.val()][(current_line, current_trip.start_time)])
complete_trajectory.append(next)
else:
(t,_,next) = next_stops.pop()
next_stops = [s for s in current_trip.stops if s[1] >= t and s[2] not in complete_trajectory]
(_,t,next) = next_stops.pop(0)
complete_trajectory.append(next)
trajectory.append(next)
trips.append(current_trip)
lines.append(current_line)
times.append(tripsByDay[current_day.val()][(current_line, current_trip.start_time)])
switch_stops = [s for s in next_stops if len(network.stops[s[2]].connections) > 0 and s[2] not in complete_trajectory]
cur_switch += 1
else:
switch_stops.remove((ta,tz,next))
except (IndexError,KeyError):
err += 1
switch_stops.remove((ta,tz,next))
if len(next_stops) > 0:
(t,_,next) = next_stops.pop()
complete_trajectory.append(next)
except IndexError:
err += 1
if cur_switch < switch_goal and n_switch[cur_switch] <= 0:
skip_bad += 1
continue
if (trajectory[-1] != next):
trajectory.append(next)
times.append(tripsByDay[current_day.val()][(current_line, current_trip.start_time)])
lines.append(current_line)
if len(trajectory) % 2 == 1 or any([]):
# Just discard this piece of shit
err += 1
continue
# complete last stage
(t,_,next) = random.choice(next_stops)
trajectory.append(next)
trips.append(current_trip)
lines.append(current_line)
times.append(tripsByDay[current_day.val()][(current_line, current_trip.start_time)])
n_switch[cur_switch] -= 1
changes.update([cur_switch])
lengths.update([len(trajectory)])
unused_stops.difference_update(trajectory)
trajectory[:] = map(lambda (l,s,t): "%s:%s:%s" % (l,str(stops_dict[s]),str(t)), zip(lines, trajectory, times))
# loops = min(int(random.expovariate(0.5)) + 1, n_traj-i)
loops = 1
if prefix:
for j in xrange(len(trajectory)/2):
ft.write("INSERT INTO stage (trip_id, num_stage, line_id, journey_id, board_stop, alight_stop) ")
ft.write("VALUES (%d, %d, '%s', %d, '%s', '%s');\n" % (i, j, lines[j*2], times[j*2], trajectory[j*2], trajectory[j*2+1]))
sys.stdout.write(",".join(map(lambda (l,s,t): "%s:%s:%s" % (l,str(stops_dict[s]),str(t)), zip(lines, trajectory, times))))
sys.stdout.write("\n")
i+=1
for _ in xrange(loops):
changes.update([cur_changes])
lengths.update([len(trajectory)])
sys.stdout.write(",".join(trajectory))
sys.stdout.write("\n")
i += 1
if prefix:
ft.close()
if unused_stops:
sys.stderr.write("\nWARNING: " + str(len(unused_stops)) + " Unused stops\n")
sys.stderr.write("\nSKIPPED: " + str(skip_bad) + "\n")
sys.stderr.write("\nSKIPPED: " + str(skip_bad) + "\n")
......@@ -478,6 +536,8 @@ def main(argv):
sys.stderr.write("\nLENGTHS: " + str(lengths) + "\n")
sys.stderr.write("\nCHANGES: " + str(changes) + "\n")
for k,v in changes.iteritems():
sys.stderr.write(str(k) + " " + str(float(v)/n_traj) + "\n")
......
DROP TABLE IF EXISTS stop CASCADE;
CREATE TABLE stop
(
id varchar primary key,
ctr_id integer unique,
name varchar,
lat float,
lon float
);
DROP TABLE IF EXISTS line CASCADE;
CREATE TABLE line
(
id varchar primary key,
short_name varchar,
long_name varchar
);
DROP TABLE IF EXISTS line_stop CASCADE;
CREATE TABLE line_stop
(
line_id varchar references line (id) on delete cascade,
stop_id varchar references stop (id) on delete cascade,
seq integer,
primary key (line_id, stop_id)
);
DROP TABLE IF EXISTS journey CASCADE;
CREATE TABLE journey
(
line_id varchar references line (id) on delete cascade,
journey_id integer,
start_time timestamp without time zone,
primary key (line_id, journey_id)
);
--skipping stop_time
DROP TABLE IF EXISTS stage CASCADE;
CREATE TABLE stage
(
trip_id integer,
num_stage integer,
line_id varchar,
journey_id integer,
board_stop varchar,
alight_stop varchar,
primary key (trip_id, num_stage),
foreign key (line_id, journey_id) references journey on delete cascade,
foreign key (line_id, board_stop) references line_stop on delete cascade,
foreign key (line_id, alight_stop) references line_stop on delete cascade
);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment