본문 바로가기
Project/ThreeMovie(영화리뷰및예약도우미)

크롤링에 batch update(bulk insert)와 coroutine으로 속도 올리기 [Kotlin + spring boot]

by HDobby 2023. 5. 25.

 시간표 업데이트 시 8분이나 걸려 실시간 남은 좌석 수 확인에 대한 메리트가 없다고 판단했다. 데이터를 insert 하는데 2분, 시간표 데이터를 가져오는데 6분 30초 대략 9분 정도가 걸려 간혹 10분이 지나 업데이트 도중 다시 스케줄러가 실행되는 에러가 생겼다. 예전에 파이썬에서 사용했던 pool.map이 생각나 멀티쓰레딩이나 멀티프로세싱으로 크롤링을 진행하고 insert 삽입 시간을 줄이기 위한 방법을 찾아보았다.

 

목차

     


    기존 코드 및 수행 시간

    @Async
    	@Scheduled(cron = "0 0/1 * * * ?")
    	fun chkMovieShowingTime() {
    		var time = lastUpdateTimeRepositorySupport.getLastTime(code)
    		if (time == null) {
    			lastUpdateTimeRepository.save(LastUpdateTime(code, 202302110107))
    			time = 202302110107
    		}
    		if (ChkNeedUpdate.chkUpdateOneHour(time)) {
    			lastUpdateTimeRepositorySupport.updateLastTime(ChkNeedUpdate.retFormatterTime(), code)
    			tmpShowTimeRepository.truncateTmpShowTime()
    			movieNameInfo = movieDataRepositorySupport.getMovieNameData()
    			
    			val mbTheaters = theaterDataRepositorySupport.getTheaterData("MB")
    			val lcTheaters = theaterDataRepositorySupport.getTheaterData("LC")
    			val cgvTheaters = theaterDataRepositorySupport.getTheaterData("CGV")
    			
    			val showTimeList: MutableList<TmpShowTime> = ArrayList()
    			val showTimeAsync: MutableList<Deferred<List<TmpShowTime>>> = ArrayList()
    			
    			val elapsed = measureTimeMillis {
    				for (mbTheater in mbTheaters) {
    					showTimeList.addAll(updateMBShowtimes(mbTheater))
    				}
    				for (lcTheater in lcTheaters) {
    					showTimeList.addAll(updateLCShowtimes(lcTheater))
    				}
    				for (cgvTheater in cgvTheaters) {
    					showTimeList.addAll(updateCGVShowtimes(cgvTheater))
    				}
    			}
                
    			val saveTime = measureTimeMillis {
    				tmpShowTimeRepository.saveAll(showTimeList)
    			}
    			
    			tmpShowTimeRepository.chgShowTimeTable()
    			tmpShowTimeRepository.truncateTmpShowTime()
    			
    			println("get data : " + elapsed)
    			println("save : " + saveTime)
    			println("all : " + (elapsed + saveTime))
    		}
    	}

    데이터 수 : 65,875

     

    크롤링 : 12.1분

    저장 : 1.6분

    전체 : 약 13.5분


    batch insert만 적용시

    @Async
    	@Scheduled(cron = "0 0/1 * * * ?")
    	fun chkMovieShowingTime() {
    		var time = lastUpdateTimeRepositorySupport.getLastTime(code)
    		if (time == null) {
    			lastUpdateTimeRepository.save(LastUpdateTime(code, 202302110107))
    			time = 202302110107
    		}
    		if (ChkNeedUpdate.chkUpdateOneHour(time)) {
    			lastUpdateTimeRepositorySupport.updateLastTime(ChkNeedUpdate.retFormatterTime(), code)
    			tmpShowTimeRepository.truncateTmpShowTime()
    			movieNameInfo = movieDataRepositorySupport.getMovieNameData()
    			
    			val mbTheaters = theaterDataRepositorySupport.getTheaterData("MB")
    			val lcTheaters = theaterDataRepositorySupport.getTheaterData("LC")
    			val cgvTheaters = theaterDataRepositorySupport.getTheaterData("CGV")
    			
    			val showTimeList: MutableList<TmpShowTime> = ArrayList()
    			val showTimeAsync: MutableList<Deferred<List<TmpShowTime>>> = ArrayList()
    			
    			val elapsed = measureTimeMillis {
    				for (mbTheater in mbTheaters) {
    					showTimeList.addAll(updateMBShowtimes(mbTheater))
    				}
    				for (lcTheater in lcTheaters) {
    					showTimeList.addAll(updateLCShowtimes(lcTheater))
    				}
    				for (cgvTheater in cgvTheaters) {
    					showTimeList.addAll(updateCGVShowtimes(cgvTheater))
    				}
    			}
                
    			val saveTime = measureTimeMillis {
    				tmpShowTimeJdbcTemplateRepository.saveAll(showTimeList)
    			}
    			
    			tmpShowTimeRepository.chgShowTimeTable()
    			tmpShowTimeRepository.truncateTmpShowTime()
    			
    			println("get data : " + elapsed)
    			println("save : " + saveTime)
    			println("all : " + (elapsed + saveTime))
    		}
    	}

    데이터 수 : 73,750

    저장 : 약 0.7분

    전체 : 약 13.25분

     


     

    코루틴만 적용시

    @Async
    	@Scheduled(cron = "0 0/1 * * * ?")
    	fun chkMovieShowingTime() {
    		var time = lastUpdateTimeRepositorySupport.getLastTime(code)
    		if (time == null) {
    			lastUpdateTimeRepository.save(LastUpdateTime(code, 202302110107))
    			time = 202302110107
    		}
    		if (ChkNeedUpdate.chkUpdateOneHour(time)) {
    			lastUpdateTimeRepositorySupport.updateLastTime(ChkNeedUpdate.retFormatterTime(), code)
    			tmpShowTimeRepository.truncateTmpShowTime()
    			movieNameInfo = movieDataRepositorySupport.getMovieNameData()
    			
    			val mbTheaters = theaterDataRepositorySupport.getTheaterData("MB")
    			val lcTheaters = theaterDataRepositorySupport.getTheaterData("LC")
    			val cgvTheaters = theaterDataRepositorySupport.getTheaterData("CGV")
    			
    			val showTimeList: MutableList<TmpShowTime> = ArrayList()
    			val showTimeAsync: MutableList<Deferred<List<TmpShowTime>>> = ArrayList()
    			
    			val elapsed = measureTimeMillis {
    				runBlocking {
    					for (mbTheater in mbTheaters) {
    						CoroutineScope(Dispatchers.IO).async { updateMBShowtimes(mbTheater) }
    							.also { showTimeAsync.add(it) }
    					}
    					for (lcTheater in lcTheaters) {
    						CoroutineScope(Dispatchers.IO).async { updateLCShowtimes(lcTheater) }
    							.also { showTimeAsync.add(it) }
    					}
    					for (cgvTheater in cgvTheaters) {
    						CoroutineScope(Dispatchers.IO).async { updateCGVShowtimes(cgvTheater) }
    							.also { showTimeAsync.add(it) }
    					}
    					for (showTimeDeferred in showTimeAsync) {
    						showTimeList.addAll(showTimeDeferred.await())
    					}
    				}
    			}
    			
    			val saveTime = measureTimeMillis {
    				tmpShowTimeRepository.saveAll(showTimeList)
    			}
    			
    			tmpShowTimeRepository.chgShowTimeTable()
    			tmpShowTimeRepository.truncateTmpShowTime()
    			
    			println("get data : " + elapsed)
    			println("save : " + saveTime)
    			println("all : " + (elapsed + saveTime))
    		}
    	}

    데이터 수 : 82,775

     

    크롤링 : 0.9분

    저장 : 2.4분

    전체 : 3.4분


     

    둘 다 적용시

    TmpShowTimeJdbcTemplateRepository.kt

    @Repository
    class TmpShowTimeJdbcTemplateRepository {
    	@Autowired
    	lateinit var jdbcTemplate: JdbcTemplate
    	
    	@Transactional
    	fun saveAll(tmpShowTimes: List<TmpShowTime>) {
    		println("start")
    		var tmpShowTimeArray = ArrayList<TmpShowTime>()
    		for (i: Int in tmpShowTimes.indices) {
    			tmpShowTimeArray.add(tmpShowTimes[i])
    			if ((i + 1) % 1000 == 0) {
    				batchInsert(tmpShowTimeArray)
    				tmpShowTimeArray.clear()
    			} else if (i == tmpShowTimes.size - 1) {
    				batchInsert(tmpShowTimeArray)
    				return
    			}
    		}
    	}
    	
    	private fun batchInsert(tmpShowTimes: List<TmpShowTime>) {
    		val sql =
    			"INSERT IGNORE INTO tmp_show_time(brchen, brchkr, city, date, items, movieen, movie_id, moviekr, movie_theater, play_kind, screenen, screenkr, total_seat, id)" +
    					"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
    		jdbcTemplate.batchUpdate(sql,
    			object : BatchPreparedStatementSetter {
    				override fun setValues(ps: PreparedStatement, i: Int) {
    					ps.setString(1, tmpShowTimes[i].brchEN)
    					ps.setString(2, tmpShowTimes[i].brchKR)
    					ps.setString(3, tmpShowTimes[i].city)
    					ps.setString(4, tmpShowTimes[i].date)
    					ps.setString(5, tmpShowTimes[i].items)
    					ps.setString(6, tmpShowTimes[i].movieEN)
    					ps.setString(7, tmpShowTimes[i].movieId)
    					ps.setString(8, tmpShowTimes[i].movieKR)
    					ps.setString(9, tmpShowTimes[i].movieTheater)
    					ps.setString(10, tmpShowTimes[i].playKind)
    					ps.setString(11, tmpShowTimes[i].screenEN)
    					ps.setString(12, tmpShowTimes[i].screenKR)
    					ps.setInt(13, tmpShowTimes[i].totalSeat)
    					ps.setString(14, tmpShowTimes[i].id.toString())
    				}
    				
    				override fun getBatchSize(): Int = tmpShowTimes.size
    			}
    		)
    	}
    }
    @Async
    	@Scheduled(cron = "0 0/1 * * * ?")
    	fun chkMovieShowingTime() {
    		var time = lastUpdateTimeRepositorySupport.getLastTime(code)
    		if (time == null) {
    			lastUpdateTimeRepository.save(LastUpdateTime(code, 202302110107))
    			time = 202302110107
    		}
    		if (ChkNeedUpdate.chkUpdateOneHour(time)) {
    			lastUpdateTimeRepositorySupport.updateLastTime(ChkNeedUpdate.retFormatterTime(), code)
    			tmpShowTimeRepository.truncateTmpShowTime()
    			movieNameInfo = movieDataRepositorySupport.getMovieNameData()
    			
    			val mbTheaters = theaterDataRepositorySupport.getTheaterData("MB")
    			val lcTheaters = theaterDataRepositorySupport.getTheaterData("LC")
    			val cgvTheaters = theaterDataRepositorySupport.getTheaterData("CGV")
    			
    			val showTimeList: MutableList<TmpShowTime> = ArrayList()
    			val showTimeAsync: MutableList<Deferred<List<TmpShowTime>>> = ArrayList()
    			
    			val elapsed = measureTimeMillis {
    				runBlocking {
    					for (mbTheater in mbTheaters) {
    						CoroutineScope(Dispatchers.IO).async { updateMBShowtimes(mbTheater) }
    							.also { showTimeAsync.add(it) }
    					}
    					for (lcTheater in lcTheaters) {
    						CoroutineScope(Dispatchers.IO).async { updateLCShowtimes(lcTheater) }
    							.also { showTimeAsync.add(it) }
    					}
    					for (cgvTheater in cgvTheaters) {
    						CoroutineScope(Dispatchers.IO).async { updateCGVShowtimes(cgvTheater) }
    							.also { showTimeAsync.add(it) }
    					}
    					for (showTimeDeferred in showTimeAsync) {
    						showTimeList.addAll(showTimeDeferred.await())
    					}
    				}
    			}
    			
    			val saveTime = measureTimeMillis {
    				tmpShowTimeJdbcTemplateRepository.saveAll(showTimeList)
    			}
    			
    			tmpShowTimeRepository.chgShowTimeTable()
    			tmpShowTimeRepository.truncateTmpShowTime()
    			
    			println("get data : " + elapsed)
    			println("save : " + saveTime)
    			println("all : " + (elapsed + saveTime))
    		}
    	}

    데이터 수 : 78,900

     

    크롤링 : 0.54분

    저장 : 1.3분

    전체 : 1.86분

     


     

    결론

    코루틴은 신이다! 코루틴과 batch insert 모두를 사용하자. jdbcTemplate을 이용하면 id Generate규칙에서 identity또한 사용이 가능하다고 한다.

     

    jdbcBatchUpdate와 jpa saveall은 약 3만개의 batch insert에서 6초 가량의 차이가 난다.

     

    jpa saveAll의 batch insert를 사용하려면 아래의 옵션을 properties에 추가 해주시면 됩니다.

    spring.datasource.hikari.data-source-properties.rewriteBatchedStatements=true
    spring.datasource.hikari.data-source-properties.useConfigs=maxPerformance
    spring.jpa.properties.hibernate.jdbc.batch_size=300

     

    전체 코드

    더보기
    package com.threemovie.threemovieapi.domain.showtime.scheduler
    
    import com.threemovie.threemovieapi.domain.movie.entity.domain.MovieNameData
    import com.threemovie.threemovieapi.domain.movie.entity.dto.MovieNameInfoVO
    import com.threemovie.threemovieapi.domain.movie.repository.support.MovieDataRepositorySupport
    import com.threemovie.threemovieapi.domain.showtime.entity.domain.TmpShowTime
    import com.threemovie.threemovieapi.domain.showtime.entity.dto.ShowTimeBranchDTO
    import com.threemovie.threemovieapi.domain.showtime.entity.dto.ShowTimeVO
    import com.threemovie.threemovieapi.domain.showtime.repository.TmpShowTimeJdbcTemplateRepository
    import com.threemovie.threemovieapi.domain.showtime.repository.TmpShowTimeRepository
    import com.threemovie.threemovieapi.domain.theater.entity.domain.Theater
    import com.threemovie.threemovieapi.domain.theater.repository.support.TheaterDataRepositorySupport
    import com.threemovie.threemovieapi.global.entity.LastUpdateTime
    import com.threemovie.threemovieapi.global.repository.LastUpdateTimeRepository
    import com.threemovie.threemovieapi.global.repository.support.LastUpdateTimeRepositorySupport
    import com.threemovie.threemovieapi.global.service.CalcSimilarity
    import com.threemovie.threemovieapi.global.service.ChkNeedUpdate
    import kotlinx.coroutines.*
    import org.json.JSONArray
    import org.json.JSONObject
    import org.jsoup.Jsoup
    import org.springframework.scheduling.annotation.Async
    import org.springframework.scheduling.annotation.Scheduled
    import org.springframework.stereotype.Component
    import java.util.regex.Matcher
    import java.util.regex.Pattern
    import kotlin.system.measureTimeMillis
    
    @Component
    class ShowTimeScheduler(
    	val lastUpdateTimeRepositorySupport: LastUpdateTimeRepositorySupport,
    	val theaterDataRepositorySupport: TheaterDataRepositorySupport,
    	val tmpShowTimeRepository: TmpShowTimeRepository,
    	val movieDataRepositorySupport: MovieDataRepositorySupport,
    	val lastUpdateTimeRepository: LastUpdateTimeRepository,
    	val tmpShowTimeJdbcTemplateRepository: TmpShowTimeJdbcTemplateRepository
    ) {
    	val userAgent: String =
    		"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
    	val CGVurl = "http://www.cgv.co.kr"
    	val LCurl = "http://www.lottecinema.co.kr"
    	val mburl = "https://www.megabox.co.kr"
    	val nameMap = HashMap<String, MovieNameInfoVO>()
    	lateinit var movieNameInfo: List<MovieNameData>
    	val square = "\\[[^)]*\\]".toRegex()
    	val code = "showtime"
    	
    	@Async
    	@Scheduled(cron = "0 0/1 * * * ?")
    	fun chkMovieShowingTime() {
    		var time = lastUpdateTimeRepositorySupport.getLastTime(code)
    		if (time == null) {
    			lastUpdateTimeRepository.save(LastUpdateTime(code, 202302110107))
    			time = 202302110107
    		}
    		if (ChkNeedUpdate.chkUpdateOneHour(time)) {
    			lastUpdateTimeRepositorySupport.updateLastTime(ChkNeedUpdate.retFormatterTime(), code)
    			tmpShowTimeRepository.truncateTmpShowTime()
    			movieNameInfo = movieDataRepositorySupport.getMovieNameData()
    			
    			val mbTheaters = theaterDataRepositorySupport.getTheaterData("MB")
    			val lcTheaters = theaterDataRepositorySupport.getTheaterData("LC")
    			val cgvTheaters = theaterDataRepositorySupport.getTheaterData("CGV")
    			
    			val showTimeList: MutableList<TmpShowTime> = ArrayList()
    			val showTimeAsync: MutableList<Deferred<List<TmpShowTime>>> = ArrayList()
    			
    			val elapsed = measureTimeMillis {
    				runBlocking {
    					for (mbTheater in mbTheaters) {
    						CoroutineScope(Dispatchers.IO).async { updateMBShowtimes(mbTheater) }
    							.also { showTimeAsync.add(it) }
    					}
    					for (lcTheater in lcTheaters) {
    						CoroutineScope(Dispatchers.IO).async { updateLCShowtimes(lcTheater) }
    							.also { showTimeAsync.add(it) }
    					}
    					for (cgvTheater in cgvTheaters) {
    						CoroutineScope(Dispatchers.IO).async { updateCGVShowtimes(cgvTheater) }
    							.also { showTimeAsync.add(it) }
    					}
    					for (showTimeDeferred in showTimeAsync) {
    						showTimeList.addAll(showTimeDeferred.await())
    					}
    				}
    			}
    			
    			val saveTime = measureTimeMillis {
    				tmpShowTimeJdbcTemplateRepository.saveAll(showTimeList)
    			}
    			
    			tmpShowTimeRepository.chgShowTimeTable()
    			tmpShowTimeRepository.truncateTmpShowTime()
    			
    			println("get data : " + elapsed)
    			println("save : " + saveTime)
    			println("all : " + (elapsed + saveTime))
    		}
    	}
    	
    	fun getMBDates(brchNo: String, brchName: String): ArrayList<String> {
    		val url = mburl + "/on/oh/ohc/Brch/schedulePage.do"
    		
    		val paramlist = HashMap<String, String>()
    		paramlist["brchNm"] = brchName
    		paramlist["brchNo"] = brchNo
    		paramlist["brchNo1"] = brchNo
    		paramlist["firstAt"] = "Y"
    		paramlist["masterType"] = "brch"
    		val conn = Jsoup.connect(url)
    			.userAgent(userAgent)
    			.data(paramlist)
    			.ignoreContentType(true)//에러나면 추가
    		val doc = conn.post().text().replace("\"\"", "\"")
    		
    		val datemap = ArrayList<String>()
    		val megamap = JSONObject(doc).getJSONObject("megaMap")
    		val showtimes = megamap.getJSONArray("movieFormDeList")
    		
    		
    		for (i in 0 until showtimes.length()) {
    			val showtime = showtimes.getJSONObject(i)
    			val date = showtime.getString("playDe")
    			
    			datemap.add(date)
    		}
    		
    		return datemap
    	}
    	
    	fun updateMBShowtimes(
    		theater: Theater
    	): List<TmpShowTime> {
    		
    		var showTimeList = ArrayList<TmpShowTime>()
    		var showTimeBranch = ShowTimeBranchDTO("MB")
    		val url = mburl + "/on/oh/ohc/Brch/schedulePage.do"
    		val brchKR = theater.brchKR
    		val brchEN = theater.brchEN
    		val city = theater.city
    		
    		showTimeBranch.brchKR = brchKR
    		showTimeBranch.brchEN = brchEN
    		showTimeBranch.city = city
    		
    		val brchNo = theater.theaterCode
    		
    		val dates = getMBDates(brchNo, brchKR)
    		
    		for (date in dates) {
    			val datestr = chgStrtoDatestr(date)
    			showTimeBranch.date = datestr
    			val paramlist = HashMap<String, String>()
    			paramlist["brchNm"] = brchKR
    			paramlist["brchNo"] = brchNo
    			paramlist["brchNo1"] = brchNo
    			paramlist["masterType"] = "brch"
    			paramlist["playDe"] = date
    			paramlist["firstAt"] = "N"
    			val conn = Jsoup.connect(url)
    				.userAgent(userAgent)
    				.data(paramlist)
    				.ignoreContentType(true)//에러나면 추가
    			val doc = conn.post().body().text()
    			val showtimes = JSONObject(doc).getJSONObject("megaMap").getJSONArray("movieFormList")
    			
    			val dtos = HashMap<Triple<String, String, String>, ShowTimeVO>()
    			for (i in 0 until showtimes.length()) {
    				val showtime = showtimes.getJSONObject(i)
    				val totalSeat = showtime.getInt("totSeatCnt")
    				val playSchldNo = showtime.getString("playSchdlNo")
    				val movieKR = showtime.getString("rpstMovieNm")
    				val playKind = showtime.getString("playKindNm")
    				val screenKR = showtime.getString("theabExpoNm")
    				val screenEN = showtime.getString("theabEngNm")
    				val ticketPage = "https://www.megabox.co.kr/bookingByPlaySchdlNo?playSchdlNo=${playSchldNo}"
    				
    				val restSeat = showtime.getInt("restSeatCnt").toString()
    				val startTime = showtime.getString("playStartTime")
    				val endTime = showtime.getString("playEndTime")
    				
    				val showTimeKey = Triple(movieKR, screenKR, playKind)
    				val item = HashMap<String, String>()
    				item["StartTime"] = startTime
    				item["EndTime"] = endTime
    				item["RestSeat"] = restSeat
    				item["TicketPage"] = ticketPage
    				if (dtos[showTimeKey] == null) {
    					val items = ArrayList<HashMap<String, String>>()
    					items.add(item)
    					
    					val showTimeVO =
    						ShowTimeVO(
    							screenEN,
    							totalSeat,
    							items
    						)
    					dtos[showTimeKey] = showTimeVO
    				} else {
    					dtos[showTimeKey]?.items?.add(item)
    				}
    				
    			}
    			
    			val list = retShowTimeList(dtos, showTimeBranch)
    			showTimeList += (list)
    		}
    		return showTimeList
    	}
    	
    	fun getLCTicketAddr(data: JSONObject): String {
    		val screenID = data.get("ScreenID").toString()
    		val cinemaID = data.get("CinemaID").toString()
    		val movieCd = data.getString("MovieCode")
    		val date = data.getString("PlayDt")
    		val startTime = data.getString("StartTime")
    		val ticketPage =
    			LCurl + "/NLCHS/ticketing?link_screenId=${screenID}&link_cinemaCode=${cinemaID}&link_movieCd=${movieCd}&link_date=${date}&link_time=${startTime}&link_channelCode=naver"
    		
    		return ticketPage
    	}
    	
    	fun getLCDates(theatercode: String): ArrayList<String> {
    		var datelist = ArrayList<String>()
    		val url: String =
    			LCurl + "/LCWS/Ticketing/TicketingData.aspx"
    		
    		var paramlist = HashMap<String, Any>()
    		paramlist["MethodName"] = "GetInvisibleMoviePlayInfo"
    		paramlist["channelType"] = "HO"
    		paramlist["osType"] = "W"
    		paramlist["osVersion"] = userAgent
    		paramlist["cinemaList"] = theatercode
    		paramlist["movieCd"] = ""
    		paramlist["playDt"] = "2023-03-03"
    		val conn = Jsoup.connect(url)
    			.userAgent(userAgent)
    			.data("ParamList", JSONObject(paramlist).toString())
    		val doc = conn.post().body().text()
    		val data = JSONObject(doc).getJSONObject("PlayDates").getJSONArray("Items")
    		
    		for (i in 0 until data.length()) {
    			val playdate = data.getJSONObject(i).getString("PlayDate").split(" ")
    			datelist.add(playdate[0])
    		}
    		
    		return datelist
    	}
    	
    	fun updateLCShowtimes(
    		theater: Theater
    	): List<TmpShowTime> {
    		var showTimeBranch = ShowTimeBranchDTO("LC")
    		var showTimeList = ArrayList<TmpShowTime>()
    		val brchKR = theater.brchKR
    		val brchEN = theater.brchEN
    		val city = theater.city
    		val cinemaCode = theater.theaterCode
    		val datelist = getLCDates(cinemaCode)
    		
    		showTimeBranch.city = city
    		showTimeBranch.brchKR = brchKR
    		showTimeBranch.brchEN = brchEN
    		for (date in datelist) {
    			showTimeBranch.date = date
    			val url: String =
    				LCurl + "/LCWS/Ticketing/TicketingData.aspx"
    			var paramlist = HashMap<String, String>()
    			paramlist["MethodName"] = "GetPlaySequence"
    			paramlist["channelType"] = "HO"
    			paramlist["osType"] = "W"
    			paramlist["osVersion"] = userAgent
    			paramlist["playDate"] = date
    			paramlist["cinemaID"] = cinemaCode
    			paramlist["representationMovieCode"] = ""
    			val conn = Jsoup.connect(url)
    				.userAgent(userAgent)
    				.data("ParamList", JSONObject(paramlist).toString())
    			val doc = conn.post().body().text()
    			val data = JSONObject(doc)
    			val playSeqs = data.getJSONObject("PlaySeqs").getJSONArray("Items")
    			val playHeaders = data.getJSONObject("PlaySeqsHeader").getJSONArray("Items")
    			val translation = HashMap<Int, String>()
    			
    			for (i in 0 until playHeaders.length()) {
    				val headerdata = playHeaders.getJSONObject(i)
    				val translationCode = headerdata.getInt("TranslationDivisionCode")
    				val translationName = headerdata.getString("TranslationDivisionNameKR")
    				translation[translationCode] = translationName
    			}
    			
    			val dtos = HashMap<Triple<String, String, String>, ShowTimeVO>()
    			for (i in 0 until playSeqs.length()) {
    				val playdata = playSeqs.getJSONObject(i)
    				
    				val movieKR = playdata.getString("MovieNameKR")
    				val screenDivisionCode = playdata.getInt("ScreenDivisionCode")
    				var screenDivisionKR = ""
    				var screenDivisionEN = ""
    				if (screenDivisionCode != 100 && playdata.getString("ScreenDivisionNameKR") != playdata.get("ScreenNameKR")) {
    					screenDivisionKR = playdata.getString("ScreenDivisionNameKR") + " "
    					screenDivisionEN = playdata.getString("ScreenDivisionNameUS") + " "
    				}
    				val screenKR = screenDivisionKR + playdata.get("ScreenNameKR")
    				val screenEN = screenDivisionEN + playdata.get("ScreenNameUS")
    				
    				val startTime = playdata.getString("StartTime")
    				val endTime = playdata.getString("EndTime")
    				val totalSeat = playdata.getInt("TotalSeatCount")
    				val restSeat = playdata.getInt("BookingSeatCount").toString()
    				val translationCode = playdata.getInt("TranslationDivisionCode")
    				
    				var playKind = playdata.getString("FilmNameKR")
    				if (translationCode != 900)
    					playKind + "(${translation[translationCode]})"
    				
    				val ticketPage = getLCTicketAddr(playdata)
    				
    				val showTimeKey = Triple(movieKR, screenKR, playKind)
    				val item = HashMap<String, String>()
    				item["StartTime"] = startTime
    				item["EndTime"] = endTime
    				item["RestSeat"] = restSeat
    				item["TicketPage"] = ticketPage
    				
    				if (dtos[showTimeKey] == null) {
    					val items = ArrayList<HashMap<String, String>>()
    					items.add(item)
    					
    					val showTimeVO =
    						ShowTimeVO(
    							screenEN,
    							totalSeat,
    							items
    						)
    					dtos[showTimeKey] = showTimeVO
    				} else {
    					dtos[showTimeKey]?.items?.add(item)
    				}
    				
    			}
    			
    			val list = retShowTimeList(dtos, showTimeBranch)
    			showTimeList += list
    		}
    		return showTimeList
    	}
    	
    	fun getCGVDates(theatercode: String): ArrayList<String> {
    		val url: String =
    			CGVurl + "/common/showtimes/iframeTheater.aspx?theatercode=${theatercode}"
    		val conn = Jsoup.connect(url)
    			.userAgent(userAgent)
    			.referrer(
    				CGVurl
    			)
    			.header(
    				"Accept",
    				"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
    			)
    			.header("Accept-Language", "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7")
    			.header(
    				"Cookie",
    				"ASP.NET_SessionId=test;"
    			)
    		val doc = conn.get()
    		
    		val days = doc.getElementsByClass("day")
    		var datelist = ArrayList<String>()
    		
    		for (day in days) {
    			val href = day.getElementsByTag("a")[0].attr("href")
    			val pattern: Pattern = Pattern.compile(".*date=([^&]+).*")
    			val matcher: Matcher = pattern.matcher(href)
    			if (matcher.find()) {
    				datelist.add(matcher.group(1))
    			}
    		}
    		
    		return datelist
    	}
    	
    	fun chgStrtoTimestr(str: String): String {
    		return "${str.substring(0..1)}:${str.substring(2..3)}"
    	}
    	
    	fun chgStrtoDatestr(str: String): String {
    		return "${str.substring(0..3)}-${str.substring(4..5)}-${str.substring(6..7)}"
    	}
    	
    	fun chgScreenKRtoEN(screenKR: String): String {
    		var screenEN: String = screenKR
    		if ("관" in screenKR)
    			screenEN = "CINEMA ${screenEN.replace("관", "")}"
    		
    		return screenEN
    	}
    	
    	fun updateCGVShowtimes(
    		theater: Theater
    	): List<TmpShowTime> {
    		var showTimeBranch = ShowTimeBranchDTO("CGV")
    		var showTimeList = ArrayList<TmpShowTime>()
    		
    		val theaterCode = theater.theaterCode
    		val brchKR = theater.brchKR
    		val brchEN = theater.brchEN
    		val city = theater.city
    		val datelist = getCGVDates(theaterCode)
    		
    		showTimeBranch.city = city
    		showTimeBranch.brchKR = brchKR
    		showTimeBranch.brchEN = brchEN
    		for (date in datelist) {
    			val datestr = chgStrtoDatestr(date)
    			showTimeBranch.date = datestr
    			val url: String =
    				CGVurl + "/common/showtimes/iframeTheater.aspx?theatercode=${theaterCode}&date=${date}"
    			val conn = Jsoup.connect(url)
    				.userAgent(userAgent)
    				.referrer(
    					CGVurl
    				)
    				.header(
    					"Accept",
    					"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
    				)
    				.header("Accept-Language", "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7")
    				.header(
    					"Cookie",
    					"ASP.NET_SessionId=test;"
    				)
    			val doc = conn.get()
    			val showtimes = doc.getElementsByClass("col-times")
    			
    			val dtos = HashMap<Triple<String, String, String>, ShowTimeVO>()
    			for (showtime in showtimes) {
    				val infoMovie = showtime.getElementsByClass("info-movie")[0]
    				val movieKR = infoMovie.getElementsByTag("a")[0].text()
    				
    				val typeHalls = showtime.getElementsByClass("type-hall")
    				for (typeHall in typeHalls) {
    					val infoHall = typeHall.getElementsByClass("info-hall")[0]
    					val playKind = infoHall.getElementsByTag("li")[0].text()
    					val screenKR = infoHall.getElementsByTag("li")[1].text()
    					val screenEN = chgScreenKRtoEN(screenKR)
    					val totalSeat =
    						infoHall.getElementsByTag("li")[2].text().replace("[^0-9]+".toRegex(), "").toInt()
    					
    					val infoTimeTable = typeHall.getElementsByClass("info-timetable")[0]
    					val timelist = infoTimeTable.getElementsByTag("li")
    					
    					for (timeinfo in timelist) {
    						val datas = timeinfo.getElementsByTag("a")
    						
    						if (datas.isEmpty() || datas[0].attr("href") == "/") {
    							continue
    						}
    						
    						val startTime = chgStrtoTimestr(datas[0].attr("data-playstarttime"))
    						val endTime = chgStrtoTimestr(datas[0].attr("data-playendtime"))
    						val ticketPage = CGVurl + datas[0].attr("href")
    						val restSeat = datas[0].attr("data-seatremaincnt")
    						
    						val showTimeKey = Triple(movieKR, screenKR, playKind)
    						val item = HashMap<String, String>()
    						item["StartTime"] = startTime
    						item["EndTime"] = endTime
    						item["RestSeat"] = restSeat
    						item["TicketPage"] = ticketPage
    						
    						if (dtos[showTimeKey] == null) {
    							val items = ArrayList<HashMap<String, String>>()
    							items.add(item)
    							
    							val showTimeVO =
    								ShowTimeVO(
    									screenEN,
    									totalSeat,
    									items
    								)
    							dtos[showTimeKey] = showTimeVO
    						} else {
    							dtos[showTimeKey]?.items?.add(item)
    						}
    						
    					}
    					
    				}
    			}
    			
    			val list = retShowTimeList(dtos, showTimeBranch)
    			showTimeList += list
    		}
    		return showTimeList
    	}
    	
    	fun retShowTimeList(
    		dtos: Map<Triple<String, String, String>, ShowTimeVO>,
    		showTimeBranch: ShowTimeBranchDTO
    	): List<TmpShowTime> {
    		val ret = ArrayList<TmpShowTime>()
    		
    		val (movieTheater, city, brchKR, brchEN, date) = showTimeBranch
    		for (dto in dtos) {
    			var (movieKR, screenKR, playKind) = dto.key
    			val (screenEN, totalSeat, items) = dto.value
    			movieKR = movieKR.replace(square, "").trim()
    			
    			if (nameMap[movieKR] == null) {
    				var movieName = MovieNameInfoVO()
    				for (nameInfo in movieNameInfo) {
    					val similarity = CalcSimilarity.calcSimilarity(nameInfo.nameKR, movieKR)
    					
    					if (similarity > 0.7 && movieName.similarity < similarity) {
    						movieName.movieId = nameInfo.movieId
    						movieName.nameKR = nameInfo.nameKR.toString()
    						movieName.nameEN = nameInfo.nameEN
    						movieName.similarity = similarity
    					}
    				}
    				nameMap[movieKR] = movieName
    			}
    			var name = nameMap[movieKR]
    			
    			val showTime = TmpShowTime(
    				name?.movieId ?: movieKR,
    				movieTheater,
    				city,
    				brchKR,
    				brchEN,
    				name?.nameKR ?: movieKR,
    				name?.nameEN ?: movieKR,
    				screenKR,
    				screenEN,
    				date,
    				totalSeat,
    				playKind,
    				JSONArray(items).toString()
    			)
    			
    			ret.add(showTime)
    		}
    		
    		return ret
    	}
    }

     


    참고

    더보기

    https://sandn.tistory.com/105 - Coroutine + Jsoup + MVVM 을 이용한 안드로이드 뉴스 앱 만들기

     

    728x90

    댓글