The benefits and challenges of sharing glidein factory operations across nine time zones between OSG and CMS. Sfiligoi, I., Dost, J., M., Zvada, M., Butenas, I., Holzman, B., Wuerthwein, F., Kreuzer, P., Teige, S., W., Quick, R., Hernández, J., M., & Flix, J. Journal of Physics: Conference Series, 2012.
The benefits and challenges of sharing glidein factory operations across nine time zones between OSG and CMS [link]Website  doi  abstract   bibtex   
OSG has been operating for a few years at UCSD a glideinWMS factory for several scientific communities, including CMS analysis, HCC and GLOW. This setup worked fine, but it had become a single point of failure. OSG thus recently added another instance at Indiana University, serving the same user communities. Similarly, CMS has been operating a glidein factory dedicated to reprocessing activities at Fermilab, with similar results. Recently, CMS decided to host another glidein factory at CERN, to increase the availability of the system, both for analysis, MC and reprocessing jobs. Given the large overlap between this new factory and the three factories in the US, and given that CMS represents a significant fraction of glideins going through the OSG factories, CMS and OSG formed a common operations team that operates all of the above factories. The reasoning behind this arrangement is that most operational issues stem from Grid-related problems, and are very similar for all the factory instances. Solving a problem in one instance thus very often solves the problem for all of them. This paper presents the operational experience of how we address both the social and technical issues of running multiple instances of a glideinWMS factory with operations staff spanning multiple time zones on two continents. © Published under licence by IOP Publishing Ltd.
@article{
 title = {The benefits and challenges of sharing glidein factory operations across nine time zones between OSG and CMS},
 type = {article},
 year = {2012},
 keywords = {Common operations,Fermilab,Glideins,Indiana Uni,Nuclear physics,Problem solving},
 volume = {396},
 websites = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84873292528&doi=10.1088%2F1742-6596%2F396%2F3%2F032103&partnerID=40&md5=a7f3ed89f9bced18803b2247bbc554fb},
 city = {New York, NY},
 id = {02cd9036-9244-364f-8bd6-80999d001122},
 created = {2019-10-01T17:20:42.303Z},
 file_attached = {false},
 profile_id = {42d295c0-0737-38d6-8b43-508cab6ea85d},
 last_modified = {2019-10-01T17:25:14.131Z},
 read = {false},
 starred = {false},
 authored = {true},
 confirmed = {true},
 hidden = {false},
 citation_key = {Sfiligoi2012},
 source_type = {article},
 notes = {cited By 0; Conference of International Conference on Computing in High Energy and Nuclear Physics 2012, CHEP 2012 ; Conference Date: 21 May 2012 Through 25 May 2012; Conference Code:95155},
 folder_uuids = {ec6ad3c6-db7d-494d-863c-ef38d23f1f7e,22c3b665-9e84-4884-8172-710aa9082eaf},
 private_publication = {false},
 abstract = {OSG has been operating for a few years at UCSD a glideinWMS factory for several scientific communities, including CMS analysis, HCC and GLOW. This setup worked fine, but it had become a single point of failure. OSG thus recently added another instance at Indiana University, serving the same user communities. Similarly, CMS has been operating a glidein factory dedicated to reprocessing activities at Fermilab, with similar results. Recently, CMS decided to host another glidein factory at CERN, to increase the availability of the system, both for analysis, MC and reprocessing jobs. Given the large overlap between this new factory and the three factories in the US, and given that CMS represents a significant fraction of glideins going through the OSG factories, CMS and OSG formed a common operations team that operates all of the above factories. The reasoning behind this arrangement is that most operational issues stem from Grid-related problems, and are very similar for all the factory instances. Solving a problem in one instance thus very often solves the problem for all of them. This paper presents the operational experience of how we address both the social and technical issues of running multiple instances of a glideinWMS factory with operations staff spanning multiple time zones on two continents. © Published under licence by IOP Publishing Ltd.},
 bibtype = {article},
 author = {Sfiligoi, I and Dost, J M and Zvada, M and Butenas, I and Holzman, B and Wuerthwein, F and Kreuzer, P and Teige, S W and Quick, R and Hernández, J M and Flix, J},
 doi = {10.1088/1742-6596/396/3/032103},
 journal = {Journal of Physics: Conference Series},
 number = {PART 3}
}

Downloads: 0