2017-08-25 3 views
0

Ich möchte HTML von einer Quelle (Link oder Datei, ...) und finden Sie Werte von ihm. html-Format ist:HTML-Manipulation mit Node JS

<!doctype html> 
<html> 
<body> 
    <main> 
    <section id="serp"> 
     <div> 
     <article>a</article> 
     <article>b</article> 
     <article>c</article> 
     <article>d</article> 
     </div> 
    </section> 
    </main> 
</body> 
</html> 

vor allem i cheerio verwendet. docs nach i schreiben:

const cheerio = require('cheerio'); 
const $ = cheerio.load(myhtml); 
const content = $('#serp div').children(); 
console.log(content); // null 

Nach dem gleichen Verfahren verwendet i x-ray und jsdom aber alle von ihnen Druck null.

+0

Was bedeutet 'console.log (myhtml)' Ausgang, kurz bevor es in Cheerio Laden ? –

+0

es Zeichenfolge

a
b
c
d
@JeremyThille – Ahmad

+0

Wenn die HTML tatsächlich verfügbar ist und in Cheerio geladen wird, gibt es keinen Grund Warum sollte der Selektor null zurückgeben? Das Problem ist woanders. Hast du _actually_ versucht, es zu protokollieren, oder hast du das gesagt, weil du denkst, dass das protokolliert wird? –

Antwort

0

Ich habe das getan folgende:

let myhtml = `<!doctype html> 
<html> 
<body> 
    <main> 
    <section id="serp"> 
     <div> 
     <article>a</article> 
     <article>b</article> 
     <article>c</article> 
     <article>d</article> 
     </div> 
    </section> 
    </main> 
</body> 
</html>`; 

const cheerio = require('cheerio'); 
const $ = cheerio.load(myhtml); 
const content = $('#serp div').children(); 
console.log(content); 
console.log(`html: ${content.html()}`); 

es gibt das folgende an die Konsole:

initialize { 
    '0': 
    { type: 'tag', 
    name: 'article', 
    namespace: 'http://www.w3.org/1999/xhtml', 
    attribs: {}, 
    'x-attribsNamespace': {}, 
    'x-attribsPrefix': {}, 
    children: [ [Object] ], 
    parent: 
     { type: 'tag', 
     name: 'div', 
     namespace: 'http://www.w3.org/1999/xhtml', 
     attribs: {}, 
     'x-attribsNamespace': {}, 
     'x-attribsPrefix': {}, 
     children: [Object], 
     parent: [Object], 
     prev: [Object], 
     next: [Object] }, 
    prev: 
     { type: 'text', 
     data: '\n  ', 
     parent: [Object], 
     prev: null, 
     next: [Circular] }, 
    next: 
     { type: 'text', 
     data: '\n  ', 
     parent: [Object], 
     prev: [Circular], 
     next: [Object] } }, 
    '1': 
    { type: 'tag', 
    name: 'article', 
    namespace: 'http://www.w3.org/1999/xhtml', 
    attribs: {}, 
    'x-attribsNamespace': {}, 
    'x-attribsPrefix': {}, 
    children: [ [Object] ], 
    parent: 
     { type: 'tag', 
     name: 'div', 
     namespace: 'http://www.w3.org/1999/xhtml', 
     attribs: {}, 
     'x-attribsNamespace': {}, 
     'x-attribsPrefix': {}, 
     children: [Object], 
     parent: [Object], 
     prev: [Object], 
     next: [Object] }, 
    prev: 
     { type: 'text', 
     data: '\n  ', 
     parent: [Object], 
     prev: [Object], 
     next: [Circular] }, 
    next: 
     { type: 'text', 
     data: '\n  ', 
     parent: [Object], 
     prev: [Circular], 
     next: [Object] } }, 
    '2': 
    { type: 'tag', 
    name: 'article', 
    namespace: 'http://www.w3.org/1999/xhtml', 
    attribs: {}, 
    'x-attribsNamespace': {}, 
    'x-attribsPrefix': {}, 
    children: [ [Object] ], 
    parent: 
     { type: 'tag', 
     name: 'div', 
     namespace: 'http://www.w3.org/1999/xhtml', 
     attribs: {}, 
     'x-attribsNamespace': {}, 
     'x-attribsPrefix': {}, 
     children: [Object], 
     parent: [Object], 
     prev: [Object], 
     next: [Object] }, 
    prev: 
     { type: 'text', 
     data: '\n  ', 
     parent: [Object], 
     prev: [Object], 
     next: [Circular] }, 
    next: 
     { type: 'text', 
     data: '\n  ', 
     parent: [Object], 
     prev: [Circular], 
     next: [Object] } }, 
    '3': 
    { type: 'tag', 
    name: 'article', 
    namespace: 'http://www.w3.org/1999/xhtml', 
    attribs: {}, 
    'x-attribsNamespace': {}, 
    'x-attribsPrefix': {}, 
    children: [ [Object] ], 
    parent: 
     { type: 'tag', 
     name: 'div', 
     namespace: 'http://www.w3.org/1999/xhtml', 
     attribs: {}, 
     'x-attribsNamespace': {}, 
     'x-attribsPrefix': {}, 
     children: [Object], 
     parent: [Object], 
     prev: [Object], 
     next: [Object] }, 
    prev: 
     { type: 'text', 
     data: '\n  ', 
     parent: [Object], 
     prev: [Object], 
     next: [Circular] }, 
    next: 
     { type: 'text', 
     data: '\n  ', 
     parent: [Object], 
     prev: [Circular], 
     next: null } }, 
    options: 
    { withDomLvl1: true, 
    normalizeWhitespace: false, 
    xml: false, 
    decodeEntities: true }, 
    _root: 
    initialize { 
    '0': 
     { type: 'root', 
     name: 'root', 
     namespace: 'http://www.w3.org/1999/xhtml', 
     attribs: {}, 
     'x-attribsNamespace': {}, 
     'x-attribsPrefix': {}, 
     children: [Object], 
     parent: null, 
     prev: null, 
     next: null }, 
    options: 
     { withDomLvl1: true, 
     normalizeWhitespace: false, 
     xml: false, 
     decodeEntities: true }, 
    length: 1, 
    _root: [Circular] }, 
    length: 4, 
    prevObject: 
    initialize { 
    '0': 
     { type: 'tag', 
     name: 'div', 
     namespace: 'http://www.w3.org/1999/xhtml', 
     attribs: {}, 
     'x-attribsNamespace': {}, 
     'x-attribsPrefix': {}, 
     children: [Object], 
     parent: [Object], 
     prev: [Object], 
     next: [Object] }, 
    options: 
     { withDomLvl1: true, 
     normalizeWhitespace: false, 
     xml: false, 
     decodeEntities: true }, 
    _root: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] }, 
    length: 1, 
    prevObject: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] } } } 
html: a 

Process finished with exit code 0