mercredi 18 février 2015

How to pipe image from request to pdfkit in node.js?

First off, I am a total newbie both to Javascript and Node.js, so sorry if my question is stupid.


I am trying to scrape text and images off a website and export it to a pdf using request, cheerio and pdfkit, but I'm having problems.


I am able to scrape the images and save them locally using this:



var $ = cheerio.load(body);

$("#mediatab1 img").each(function(){
var image= 'http://WWW.WEBSITE.no' + $(this).attr('src');
images.push(image);
});

for(var i = 0; i < images.length; i++){
request(images[i]).pipe(fs.createWriteStream('images/' + i + '.jpg')); }


BUT! Here's the problem:


1. INTENT: When I try to write the files to the pdf using



doc.image('images/0.jpg');


all I get is



Error: Unknown image format.
at Function.PDFImage.open (C:\nodejs\node_modules\pdfkit\js\im
age.js:41:15)
at PDFDocument.module.exports.image (C:\nodejs\node_modules\pd
fkit\js\mixins\images.js:27:26)
at Request._callback (C:\nodejs\prosjekt.js:29:6)
at Request.self.callback (C:\nodejs\node_modules\request\reque
st.js:344:22)
at Request.emit (events.js:98:17)
at Request.<anonymous> (C:\nodejs\node_modules\request\request
.js:1239:14)
at Request.emit (events.js:117:20)
at IncomingMessage.<anonymous> (C:\nodejs\node_modules\request
\request.js:1187:12)
at IncomingMessage.emit (events.js:117:20)
at _stream_readable.js:944:16


0.jpg is 0 bytes, so I suspect there is a timing issue here?


2. INTENT


I tried to use .pipe instead of saving locally:



request(images[i]).pipe(doc.image(images[0]));


But all I get is:



"Error: ENOENT, no such file or directory 'C:\nodejs\http:\http://ift.tt/1FsvFAo
13'


Any idea how to fix this or to solve the problem in any other way?


Here's the whole script:



var request = require('request'),
cheerio = require('cheerio'),
PDFDocument = require('pdfkit'),
doc = new PDFDocument,
fs = require('fs'),
prompt = require('prompt');
bilder = [];


prompt.start();
prompt.get(['prosjekturl'], function (err, result) {
request({url: 'http://ift.tt/1LaAVXU' + result.prosjekturl, encoding:null}, function(err, resp, body){
if(!err && resp.statusCode == 200){
// console.log(body);

var $ = cheerio.load(body);

$("#mediatab1 img").each(function(){
var bilde = 'http://www.WEBSITE.no' + $(this).attr('src');
bilder.push(bilde);
});

console.log(bilder);
for(var i = 0; i < bilder.length; i++){
request(bilder[i]).pipe(fs.createWriteStream('images/' + i + '.jpg'));
}

$("#MiddleRightContainer h1").each(function(){
var tittel = $(this).text();
console.log(tittel);
doc.pipe(fs.createWriteStream('pdf/output.pdf'));

doc.font('fonts/FONT-Regular.ttf');
doc.fontSize(32);
doc.text(tittel);

});

$("#MiddleRightContainer .user-content p").each(function(){
var tekst = $(this).text();

console.log(tekst);
doc.pipe(fs.createWriteStream('pdf/output.pdf'));
doc.fontSize(12);
doc.text(tekst);


});

$("#RightSidebar div.box2").each(function(){
var fakta = $(this).text();
console.log(fakta);

});

}



doc.end();
});


});

Aucun commentaire:

Enregistrer un commentaire