Grounding Spatial Language for Video Search

Grounding Spatial Language for Video Search. Tellex, S., Kollar, T., Shaw, G., Roy, N., & Roy, D.

The ability to find a video clip that matches a natural lan-guage description of an event would enable intuitive search of large databases of surveillance video. We present a mech-anism for connecting a spatial language query to a video clip corresponding to the query. The system can retrieve video clips matching millions of potential queries that de-scribe complex events in video such as " people walking from the hallway door, around the island, to the kitchen sink. " By breaking down the query into a sequence of independent structured clauses and modeling the meaning of each com-ponent of the structure separately, we are able to improve on previous approaches to video retrieval by finding clips that match much longer and more complex queries using a rich set of spatial relations such as " down " and " past. " We present a rigorous analysis of the system's performance, based on a large corpus of task-constrained language collected from fourteen subjects. Using this corpus, we show that the sys-tem effectively retrieves clips that match natural language descriptions: 58.3% were ranked in the top two of ten in a retrieval task. Furthermore, we show that spatial relations play an important role in the system's performance.

@article{
 title = {Grounding Spatial Language for Video Search},
 type = {article},
 keywords = {Search process Keywords video retrieval,experimentation,measurement,spatial language General Terms algorithms},
 volume = {10},
 id = {876346fe-76db-36b2-9a4a-eb78769116aa},
 created = {2017-09-01T15:53:37.269Z},
 file_attached = {true},
 profile_id = {80da7853-f7b7-36a9-8e4c-d7ddb2d9e538},
 group_id = {a2333ea3-15a4-3d40-8d36-f0d9590ca926},
 last_modified = {2017-09-01T15:53:37.399Z},
 read = {false},
 starred = {false},
 authored = {false},
 confirmed = {false},
 hidden = {false},
 abstract = {The ability to find a video clip that matches a natural lan-guage description of an event would enable intuitive search of large databases of surveillance video. We present a mech-anism for connecting a spatial language query to a video clip corresponding to the query. The system can retrieve video clips matching millions of potential queries that de-scribe complex events in video such as " people walking from the hallway door, around the island, to the kitchen sink. " By breaking down the query into a sequence of independent structured clauses and modeling the meaning of each com-ponent of the structure separately, we are able to improve on previous approaches to video retrieval by finding clips that match much longer and more complex queries using a rich set of spatial relations such as " down " and " past. " We present a rigorous analysis of the system's performance, based on a large corpus of task-constrained language collected from fourteen subjects. Using this corpus, we show that the sys-tem effectively retrieves clips that match natural language descriptions: 58.3% were ranked in the top two of ten in a retrieval task. Furthermore, we show that spatial relations play an important role in the system's performance.},
 bibtype = {article},
 author = {Tellex, Stefanie and Kollar, Thomas and Shaw, George and Roy, Nicholas and Roy, Deb}
}

Downloads: 0

{"_id":"rnrmXGCLbjY8gGH4K","bibbaseid":"tellex-kollar-shaw-roy-roy-groundingspatiallanguageforvideosearch","downloads":0,"creationDate":"2017-09-01T15:59:40.182Z","title":"Grounding Spatial Language for Video Search","author_short":["Tellex, S.","Kollar, T.","Shaw, G.","Roy, N.","Roy, D."],"year":null,"bibtype":"article","biburl":null,"bibdata":{"title":"Grounding Spatial Language for Video Search","type":"article","keywords":"Search process Keywords video retrieval,experimentation,measurement,spatial language General Terms algorithms","volume":"10","id":"876346fe-76db-36b2-9a4a-eb78769116aa","created":"2017-09-01T15:53:37.269Z","file_attached":"true","profile_id":"80da7853-f7b7-36a9-8e4c-d7ddb2d9e538","group_id":"a2333ea3-15a4-3d40-8d36-f0d9590ca926","last_modified":"2017-09-01T15:53:37.399Z","read":false,"starred":false,"authored":false,"confirmed":false,"hidden":false,"abstract":"The ability to find a video clip that matches a natural lan-guage description of an event would enable intuitive search of large databases of surveillance video. We present a mech-anism for connecting a spatial language query to a video clip corresponding to the query. The system can retrieve video clips matching millions of potential queries that de-scribe complex events in video such as \" people walking from the hallway door, around the island, to the kitchen sink. \" By breaking down the query into a sequence of independent structured clauses and modeling the meaning of each com-ponent of the structure separately, we are able to improve on previous approaches to video retrieval by finding clips that match much longer and more complex queries using a rich set of spatial relations such as \" down \" and \" past. \" We present a rigorous analysis of the system's performance, based on a large corpus of task-constrained language collected from fourteen subjects. Using this corpus, we show that the sys-tem effectively retrieves clips that match natural language descriptions: 58.3% were ranked in the top two of ten in a retrieval task. Furthermore, we show that spatial relations play an important role in the system's performance.","bibtype":"article","author":"Tellex, Stefanie and Kollar, Thomas and Shaw, George and Roy, Nicholas and Roy, Deb","bibtex":"@article{\n title = {Grounding Spatial Language for Video Search},\n type = {article},\n keywords = {Search process Keywords video retrieval,experimentation,measurement,spatial language General Terms algorithms},\n volume = {10},\n id = {876346fe-76db-36b2-9a4a-eb78769116aa},\n created = {2017-09-01T15:53:37.269Z},\n file_attached = {true},\n profile_id = {80da7853-f7b7-36a9-8e4c-d7ddb2d9e538},\n group_id = {a2333ea3-15a4-3d40-8d36-f0d9590ca926},\n last_modified = {2017-09-01T15:53:37.399Z},\n read = {false},\n starred = {false},\n authored = {false},\n confirmed = {false},\n hidden = {false},\n abstract = {The ability to find a video clip that matches a natural lan-guage description of an event would enable intuitive search of large databases of surveillance video. We present a mech-anism for connecting a spatial language query to a video clip corresponding to the query. The system can retrieve video clips matching millions of potential queries that de-scribe complex events in video such as \" people walking from the hallway door, around the island, to the kitchen sink. \" By breaking down the query into a sequence of independent structured clauses and modeling the meaning of each com-ponent of the structure separately, we are able to improve on previous approaches to video retrieval by finding clips that match much longer and more complex queries using a rich set of spatial relations such as \" down \" and \" past. \" We present a rigorous analysis of the system's performance, based on a large corpus of task-constrained language collected from fourteen subjects. Using this corpus, we show that the sys-tem effectively retrieves clips that match natural language descriptions: 58.3% were ranked in the top two of ten in a retrieval task. Furthermore, we show that spatial relations play an important role in the system's performance.},\n bibtype = {article},\n author = {Tellex, Stefanie and Kollar, Thomas and Shaw, George and Roy, Nicholas and Roy, Deb}\n}","author_short":["Tellex, S.","Kollar, T.","Shaw, G.","Roy, N.","Roy, D."],"urls":{"Paper":"http://bibbase.org/service/mendeley/80da7853-f7b7-36a9-8e4c-d7ddb2d9e538/file/45967bd7-8383-a70b-4bf5-595cc0a4d7a4/Grounding_Spatial_Language_for_Video_Search.pdf.pdf"},"bibbaseid":"tellex-kollar-shaw-roy-roy-groundingspatiallanguageforvideosearch","role":"author","keyword":["Search process Keywords video retrieval","experimentation","measurement","spatial language General Terms algorithms"],"downloads":0},"search_terms":["grounding","spatial","language","video","search","tellex","kollar","shaw","roy","roy"],"keywords":["search process keywords video retrieval","experimentation","measurement","spatial language general terms algorithms"],"authorIDs":[]}