[go: up one dir, main page]

Autofill: Improve inferring label from divs

Do not traverse into divs known not to contain inferable labels.

BUG=454366

Review URL: https://codereview.chromium.org/913923004

Cr-Commit-Position: refs/heads/master@{#316172}
(cherry picked from commit 41711d061f0b1ac440e88148e99c883af6df14de)

Review URL: https://codereview.chromium.org/937473002

Cr-Commit-Position: refs/branch-heads/2272@{#311}
Cr-Branched-From: 827a380cfdb31aa54c8d56e63ce2c3fd8c3ba4d4-refs/heads/master@{#310958}
diff --git a/chrome/test/data/autofill/heuristics/input/bug_454366b.html b/chrome/test/data/autofill/heuristics/input/bug_454366b.html
new file mode 100644
index 0000000..32133d4
--- /dev/null
+++ b/chrome/test/data/autofill/heuristics/input/bug_454366b.html
@@ -0,0 +1,81 @@
+<html>
+<head></head>
+<body>
+  <input type="text name="hist_state" id="hist_state" style="display: none;">
+  <div class="g-h-f-ci g-h-f-Au">
+    <div class="g-h-f-Qm" title="Browse people" guidedhelpid="shareboxpeoplepickericon" tabindex="0" style="-webkit-user-select: none;"></div>
+    <div class="g-h-f-vc g-h-f-k" style="">
+      <div class="g-h-f-vc-B b-K b-K-Xb URaP8">
+        <span class="g-h-f-N"><span class="g-h-f-N-Rm" id="sbda">+ Add names, circles, or email addresses</span>
+          <input type="text" class="g-h-f-N-N" id="sbdp" value="" placeholder="+ Add names, circles, or email addresses" aria-label="Add names, circles, or email addresses" tabindex="0" aria-haspopup="false" role="combobox" aria-autocomplete="list">
+        </span>
+        <div class="Dp">
+          <div role="button" aria-label="Posting options" tabindex="0" class="CC tm d-k-l d-r-c" aria-expanded="false" aria-haspopup="true" style="-webkit-user-select: none;">
+            <div class="d-k-l d-r-c-sa-z">
+              <div class="d-k-l d-r-c-aa-z">
+                <div class="d-k-l d-r-c-ha"></div>
+                <div class="d-k-l d-r-c-Qa">&nbsp;</div>
+              </div>
+            </div>
+          </div>
+          <div class="DC zo" style="display: none;"></div>
+          <div class="yo" style="display: none;"></div>
+          <div class="lt"></div>
+          <div class="EC"></div>
+          <div class="d-r d-r-ih cw" role="menu" aria-haspopup="true" style="display: none; -webkit-user-select: none;">
+            <div class="d-A osa Nq" role="menuitem" id=":4j" style="-webkit-user-select: none;">
+              <div class="d-A-B" style="-webkit-user-select: none;">
+                <div style="-webkit-user-select: none;">Disable comments</div>
+              </div>
+            </div>
+            <div class="d-A osa Qq" role="menuitem" id=":4k" style="-webkit-user-select: none;">
+              <div class="d-A-B" style="-webkit-user-select: none;">
+                <div style="-webkit-user-select: none;">Enable Comments</div>
+              </div>
+            </div>
+            <div class="d-A osa Rq" role="menuitem" id=":4l" style="-webkit-user-select: none;">
+              <div class="d-A-B" style="-webkit-user-select: none;">
+                <div style="-webkit-user-select: none;">Disable reshares</div>
+              </div>
+            </div>
+            <div class="d-A osa Sq" role="menuitem" id=":4m" style="-webkit-user-select: none;">
+              <div class="d-A-B" style="-webkit-user-select: none;">
+                <div style="-webkit-user-select: none;">Enable reshares</div>
+              </div>
+            </div>
+            <div class="d-A osa Oq" role="menuitem" id=":4n" style="-webkit-user-select: none;">
+              <div class="d-A-B" style="-webkit-user-select: none;">
+                <div style="-webkit-user-select: none;">Keep within <b style="-webkit-user-select: none;">Google.com</b>
+                </div>
+              </div>
+            </div>
+            <div class="d-A osa Pq" role="menuitem" id=":4o" style="-webkit-user-select: none;">
+              <div class="d-A-B" style="-webkit-user-select: none;"></div>
+            </div>
+          </div>
+        </div>
+        <span role="list" aria-atomic="true" aria-live="assertive" aria-relevant="all"></span>
+      </div>
+      <div class="g-h-f-V-nb" style="display: none;">
+        <div class="g-h-f-V-bd"></div>
+        <div class="d-r-Gk g-h-f-V b-K b-K-Xb URaP8 g-h-f-V-Lb" role="listbox" id=":4v" style="opacity: 0; -webkit-user-select: none; display: none;">
+          <div class="d-A" id=":4w" role="option" style="-webkit-user-select: none;">
+            <div class="d-A-B" style="-webkit-user-select: none;">
+              <span class="g-h-f-m-Ed-wc-E g-h-f-m" style="-webkit-user-select: none;"></span> <span class="g-h-f-za-B" style="-webkit-user-select: none;">Your circles</span>
+            </div>
+          </div>
+          <div class="d-A" id=":4y" role="option" style="-webkit-user-select: none;">
+            <div class="d-A-B" style="-webkit-user-select: none;">
+              <span class="g-h-f-m-de-E g-h-f-m" style="-webkit-user-select: none;"></span> <span class="g-h-f-za-B" style="-webkit-user-select: none;">Extended circles</span>
+            </div>
+          </div>
+          <div class="d-cm" style="-webkit-user-select: none;"></div>
+        </div>
+      </div>
+    </div>
+    <div class="g-h-f-CJ" style="display: none;">
+      <input type="text" class="g-h-f-iK">
+    </div>
+  </div>
+</body>
+</html>
diff --git a/chrome/test/data/autofill/heuristics/output/08_register_aol.com.out b/chrome/test/data/autofill/heuristics/output/08_register_aol.com.out
index 94ac64a7..7544aaf 100644
--- a/chrome/test/data/autofill/heuristics/output/08_register_aol.com.out
+++ b/chrome/test/data/autofill/heuristics/output/08_register_aol.com.out
@@ -5,8 +5,8 @@
 UNKNOWN_TYPE | verifyPasswordHint | Password | Retype password | {actionForm.firstName}_1-default
 UNKNOWN_TYPE | {actionForm.verifyPassword} | Password |  | {actionForm.firstName}_1-default
 UNKNOWN_TYPE | wlw-select_key:{actionForm.dobMonth} | Select Month |  | {actionForm.firstName}_1-default
-UNKNOWN_TYPE | {actionForm.dobDay} | Select Month | Day (dd) | {actionForm.firstName}_1-default
-UNKNOWN_TYPE | {actionForm.dobYear} | Select Month | Year (yyyy) | {actionForm.firstName}_1-default
+UNKNOWN_TYPE | {actionForm.dobDay} |  | Day (dd) | {actionForm.firstName}_1-default
+UNKNOWN_TYPE | {actionForm.dobYear} |  | Year (yyyy) | {actionForm.firstName}_1-default
 UNKNOWN_TYPE | wlw-radio_button_group_key:{actionForm.gender} | Female | Female | {actionForm.firstName}_1-default
 UNKNOWN_TYPE | wlw-radio_button_group_key:{actionForm.gender} | Male | Male | {actionForm.firstName}_1-default
 ADDRESS_HOME_ZIP | {actionForm.zipCode} | Zip Code |  | {actionForm.firstName}_1-default
diff --git a/chrome/test/data/autofill/heuristics/output/25_checkout_m_llbean.com.out b/chrome/test/data/autofill/heuristics/output/25_checkout_m_llbean.com.out
index 3f19903..68060b3 100644
--- a/chrome/test/data/autofill/heuristics/output/25_checkout_m_llbean.com.out
+++ b/chrome/test/data/autofill/heuristics/output/25_checkout_m_llbean.com.out
@@ -13,7 +13,7 @@
 ADDRESS_HOME_LINE3 | _1_address3 | Address Line 3 (optional) |  | _1_personTitle_1-default
 ADDRESS_HOME_STATE | _1_zipCode | Enter Zip for City and State |  | _1_personTitle_1-default
 ADDRESS_HOME_CITY | _1_city | City |  | _1_personTitle_1-default
-ADDRESS_HOME_STATE | _1_state | City |  | _1_personTitle_1-default
+ADDRESS_HOME_STATE | _1_state |  |  | _1_personTitle_1-default
 ADDRESS_HOME_ZIP | _1_CAPostal | Postal Code |  | _1_personTitle_1-default
 ADDRESS_HOME_CITY | _1_CACity | City |  | _1_personTitle_1-default
 ADDRESS_HOME_STATE | _1_CAProvince | Province |  | _1_personTitle_1-default
diff --git a/chrome/test/data/autofill/heuristics/output/bug_454366b.out b/chrome/test/data/autofill/heuristics/output/bug_454366b.out
new file mode 100644
index 0000000..8d7d1985
--- /dev/null
+++ b/chrome/test/data/autofill/heuristics/output/bug_454366b.out
@@ -0,0 +1,3 @@
+UNKNOWN_TYPE | hist_state |  |  | hist_state_1-default
+UNKNOWN_TYPE | sbdp | + Add names, circles, or email addresses |  | hist_state_1-default
+UNKNOWN_TYPE |  |  |  | hist_state_1-default
diff --git a/components/autofill/content/renderer/form_autofill_util.cc b/components/autofill/content/renderer/form_autofill_util.cc
index 5c7e2c5..bea44e3 100644
--- a/components/autofill/content/renderer/form_autofill_util.cc
+++ b/components/autofill/content/renderer/form_autofill_util.cc
@@ -5,6 +5,7 @@
 #include "components/autofill/content/renderer/form_autofill_util.h"
 
 #include <map>
+#include <set>
 
 #include "base/command_line.h"
 #include "base/logging.h"
@@ -169,13 +170,16 @@
 
 // This is a helper function for the FindChildText() function (see below).
 // Search depth is limited with the |depth| parameter.
-base::string16 FindChildTextInner(const WebNode& node, int depth) {
+// |divs_to_skip| is a list of <div> tags to ignore if encountered.
+base::string16 FindChildTextInner(const WebNode& node,
+                                  int depth,
+                                  const std::set<WebNode>& divs_to_skip) {
   if (depth <= 0 || node.isNull())
     return base::string16();
 
   // Skip over comments.
   if (node.nodeType() == WebNode::CommentNode)
-    return FindChildTextInner(node.nextSibling(), depth - 1);
+    return FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip);
 
   if (node.nodeType() != WebNode::ElementNode &&
       node.nodeType() != WebNode::TextNode)
@@ -191,6 +195,9 @@
          IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
       return base::string16();
     }
+
+    if (element.hasHTMLTagName("div") && ContainsKey(divs_to_skip, node))
+      return base::string16();
   }
 
   // Extract the text exactly at this node.
@@ -198,20 +205,38 @@
 
   // Recursively compute the children's text.
   // Preserve inter-element whitespace separation.
-  base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
+  base::string16 child_text =
+      FindChildTextInner(node.firstChild(), depth - 1, divs_to_skip);
   bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
   node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
 
   // Recursively compute the siblings' text.
   // Again, preserve inter-element whitespace separation.
   base::string16 sibling_text =
-      FindChildTextInner(node.nextSibling(), depth - 1);
+      FindChildTextInner(node.nextSibling(), depth - 1, divs_to_skip);
   add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
   node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
 
   return node_text;
 }
 
+// Same as FindChildText() below, but with a list of div nodes to skip.
+// TODO(thestig): See if other FindChildText() callers can benefit from this.
+base::string16 FindChildTextWithIgnoreList(
+    const WebNode& node,
+    const std::set<WebNode>& divs_to_skip) {
+  if (node.isTextNode())
+    return node.nodeValue();
+
+  WebNode child = node.firstChild();
+
+  const int kChildSearchDepth = 10;
+  base::string16 node_text =
+      FindChildTextInner(child, kChildSearchDepth, divs_to_skip);
+  base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
+  return node_text;
+}
+
 // Returns the aggregated values of the descendants of |element| that are
 // non-empty text nodes.  This is a faster alternative to |innerText()| for
 // performance critical operations.  It does a full depth-first search so can be
@@ -219,15 +244,7 @@
 // |innerText()|, the search depth and breadth are limited to a fixed threshold.
 // Whitespace is trimmed from text accumulated at descendant nodes.
 base::string16 FindChildText(const WebNode& node) {
-  if (node.isTextNode())
-    return node.nodeValue();
-
-  WebNode child = node.firstChild();
-
-  const int kChildSearchDepth = 10;
-  base::string16 node_text = FindChildTextInner(child, kChildSearchDepth);
-  base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
-  return node_text;
+  return FindChildTextWithIgnoreList(node, std::set<WebNode>());
 }
 
 // Shared function for InferLabelFromPrevious() and InferLabelFromNext().
@@ -408,6 +425,7 @@
 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
   WebNode node = element.parentNode();
   bool looking_for_parent = true;
+  std::set<WebNode> divs_to_skip;
 
   // Search the sibling and parent <div>s until we find a candidate label.
   base::string16 inferred_label;
@@ -416,15 +434,21 @@
   CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
   while (inferred_label.empty() && !node.isNull()) {
     if (HasTagName(node, kDiv)) {
-      inferred_label = FindChildText(node);
+      if (looking_for_parent)
+        inferred_label = FindChildTextWithIgnoreList(node, divs_to_skip);
+      else
+        inferred_label = FindChildText(node);
+
       // Avoid sibling DIVs that contain autofillable fields.
       if (!looking_for_parent && !inferred_label.empty()) {
         CR_DEFINE_STATIC_LOCAL(WebString, kSelector,
                                ("input, select, textarea"));
         blink::WebExceptionCode ec = 0;
         WebElement result_element = node.querySelector(kSelector, ec);
-        if (!result_element.isNull())
+        if (!result_element.isNull()) {
           inferred_label.clear();
+          divs_to_skip.insert(node);
+        }
       }
 
       looking_for_parent = false;