'How to prevent libcurl(c++) form downloading binary data?
I am making a web crawler and I have the following code but the problem is that it also downloads binary data and I don't want that to happen. How do I prevent it
size_t HTML::WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
size_t realsize = size * nmemb;
if(contents!=NULL||userp!=NULL){
std::string* str=(std::string*)userp;
str->reserve(realsize);
auto c_str=(char*)contents;
if(c_str!=NULL){
for(size_t i=0;i<realsize;i++){
str->push_back(c_str[i]);
}
}
}
return realsize;
}
HTML_CODE HTML::get_html(std::string url) {
std::string chunk;
CURL *curl_handle=curl_easy_init();
CURLcode res;
if(curl_handle) {
curl_easy_setopt(curl_handle, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, USER_AGENT);
res = curl_easy_perform(curl_handle);
if(res != CURLE_OK) {
std::cout<<"Can't get html content from "<<url<<"\n";
fprintf(stderr, "error: %s\n", curl_easy_strerror(res));
return {"",""};
}
curl_easy_cleanup(curl_handle);
}
else{
std::cout<<"Error: Couldn't create a curl instance"<<std::endl;
return {"",""};
}
return {.url=url,.content=chunk};
}
Things I have tried:-
- Check if the data has a null terminator
- Check if the char is an assci letter(it wont work with other language)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
