runBlocking {
bookLinks.mapIndexed { ranking, bookLink ->
val job = async { scrapeBookData(browser, bookLink, ranking) }
val result = job.await()
if (result != null) {
bestsellers.add(result)
}
}
}
private suspend fun scrapeBookData(browser: Browser, bookUrl: String, ranking: Int): BookDTO? {
val page = browser.newPage()
page.navigate(bookUrl, Page.NavigateOptions().setWaitUntil(WaitUntilState.DOMCONTENTLOADED))
printWithThread("${bookUrl}에 접근 완료")
delay(3000)
val data = page.evaluate(
""" () => JSON.stringify({
title: document.querySelector('.prod_title')?.innerText?.trim() || '',
author: document.querySelector('.author')?.innerText?.trim() || '',
isbn: document.querySelector('#scrollSpyProdInfo .product_detail_area.basic_info table tbody tr:nth-child(1) td')?.innerText?.trim() || '',
description: document.querySelector('.intro_bottom')?.innerText?.trim() || '',
image: document.querySelector('.portrait_img_box img')?.getAttribute('src') || ''
}) """
).toString()
val type = object : TypeToken<Map<String, String>>() {}.type
val json: Map<String, String> = Gson().fromJson(data, type)
page.close()
printWithThread("${bookUrl}의 데이터 파싱 완료")
if (json.values.all { it.isBlank() }) {
return null
}
return BookDTO(
id = 0L,
title = json["title"] ?: "",
author = json["author"] ?: "",
description = json["description"] ?: "",
image = json["image"] ?: "",
isbn = json["isbn"] ?: "",
ranking = ranking + 1,
favoriteCount = 0
)
}
我预期如果我将 scrapeBookData(一个挂起函数)延迟 3 秒,协程会在延迟期间切换并再次执行 scrapeBookData。我预期在重复执行 scrapeBookData 3 秒后,第一个协程将解析网络响应已完成的页面。然而,协程是同步运行的。
[http-nio-8080-exec-2 @coroutine#2] https:S000215819502에 접근 완료
[http-nio-8080-exec-2 @coroutine#2] https:S000215819502의 데이터 파싱 완료
[http-nio-8080-exec-2 @coroutine#3] https:S000215150862에 접근 완료
[http-nio-8080-exec-2 @coroutine#3] https:S000215150862의 데이터 파싱 완료
[http-nio-8080-exec-2 @coroutine#4] https:S000215787651에 접근 완료